def change_font_in_declaration(style, old_name, new_name=None): changed = False ff = style.getProperty('font-family') if ff is not None: fams = parse_font_family(css_text(ff.propertyValue)) nfams = list(filter(None, [new_name if x == old_name else x for x in fams])) if fams != nfams: if nfams: ff.propertyValue.cssText = serialize_font_family(nfams) else: style.removeProperty(ff.name) changed = True ff = style.getProperty('font') if ff is not None: props = parse_font(css_text(ff.propertyValue)) fams = props.get('font-family') or [] nfams = list(filter(None, [new_name if x == old_name else x for x in fams])) if fams != nfams: props['font-family'] = nfams if nfams: ff.propertyValue.cssText = serialize_font(props) else: style.removeProperty(ff.name) changed = True return changed
def random_user_agent(choose=None, allow_ie=True): from calibre.utils.random_ua import common_user_agents ua_list = common_user_agents() ua_list = list(filter(lambda x: not is_mobile_ua(x), ua_list)) if not allow_ie: ua_list = list(filter(lambda x: 'Trident/' not in x and 'Edge/' not in x, ua_list)) return random.choice(ua_list) if choose is None else ua_list[choose]
def __init__(self, libraries, opts, testing=False, notify_changes=None): self.opts = opts self.library_broker = libraries if isinstance(libraries, LibraryBroker) else LibraryBroker(libraries) self.testing = testing self.lock = Lock() self.user_manager = UserManager(opts.userdb) self.ignored_fields = frozenset(filter(None, (x.strip() for x in (opts.ignored_fields or '').split(',')))) self.displayed_fields = frozenset(filter(None, (x.strip() for x in (opts.displayed_fields or '').split(',')))) self._notify_changes = notify_changes
def render_categories(opts, db, category_data): items = {} with db.safe_read_lock: root, node_id_map, category_nodes, recount_nodes = create_toplevel_tree(category_data, items, db.field_metadata, opts) fillout_tree(root, items, node_id_map, category_nodes, category_data, db.field_metadata, opts, db.fields['rating'].book_value_map) for node in recount_nodes: item = items[node['id']] item['count'] = sum(1 for x in iternode_descendants(node) if not items[x['id']].get('is_category', False)) if opts.hidden_categories: # We have to remove hidden categories after all processing is done as # items from a hidden category could be in a user category root['children'] = list(filter((lambda child:items[child['id']]['category'] not in opts.hidden_categories), root['children'])) if opts.hide_empty_categories: root['children'] = list(filter((lambda child:items[child['id']]['count'] > 0), root['children'])) return {'root':root, 'item_map': items}
def extract_images(self, picts): from calibre.utils.imghdr import what self.log('Extracting images...') with open(picts, 'rb') as f: raw = f.read() picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw)) hex = re.compile(r'[^a-fA-F0-9]') encs = [hex.sub('', pict) for pict in picts] count = 0 imap = {} for enc in encs: if len(enc) % 2 == 1: enc = enc[:-1] data = enc.decode('hex') fmt = what(None, data) if fmt is None: fmt = 'wmf' count += 1 name = u'%04d.%s' % (count, fmt) with open(name, 'wb') as f: f.write(data) imap[count] = name # with open(name+'.hex', 'wb') as f: # f.write(enc) return self.convert_images(imap)
def polish_index_markup(index, blocks): # Blocks are in reverse order at this point path_map = {} ldict = {} for block in blocks: cls = block.get('class', '') or '' block.set('class', (cls + ' index-entry').lstrip()) a = block.xpath('descendant::a[1]') text = '' if a: text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode_type).strip() if ':' in text: path_map[block] = parts = list(filter(None, (x.strip() for x in text.split(':')))) if len(parts) > 1: split_up_block(block, a[0], text, parts, ldict) else: # try using a span all the time path_map[block] = [text] parent = a[0].getparent() span = parent.makeelement('span', style='display:block; margin-left: 0em') parent.append(span) span.append(a[0]) ldict[span] = 0 for br in block.xpath('descendant::br'): br.tail = None # We want a single block for each main entry prev_block = blocks[0] for block in blocks[1:]: pp, pn = path_map[prev_block], path_map[block] if pp[0] == pn[0]: merge_blocks(prev_block, block, 0, 0, pn, ldict) else: prev_block = block
def __call__(self, query, limit=None): query = normalize('NFC', unicode_type(query)) with wlock: for i, scorer in enumerate(self.scorers): workers[0].requests.put((i, scorer, query)) if self.sort_keys is None: self.sort_keys = { i: primary_sort_key(x) for i, x in enumerate(self.items) } num = len(self.task_maps) scores, positions = {}, {} error = None while num > 0: ok, x = workers[0].results.get() num -= 1 if ok: task_num, vals = x task_map = self.task_maps[task_num] for i, (score, pos) in enumerate(vals): item = task_map[i] scores[item] = score positions[item] = pos else: error = x if error is not None: raise Exception('Failed to score items: %s' % error) items = sorted(((-scores[i], item, positions[i]) for i, item in enumerate(self.items)), key=itemgetter(0)) if limit is not None: del items[limit:] return OrderedDict(x[1:] for x in filter(itemgetter(0), items))
def parse_uri(uri, parse_query=True): scheme, authority, path = parse_request_uri(uri) if path is None: raise HTTPSimpleResponse(http_client.BAD_REQUEST, "No path component") if b'#' in path: raise HTTPSimpleResponse(http_client.BAD_REQUEST, "Illegal #fragment in Request-URI.") if scheme: try: scheme = scheme.decode('ascii') except ValueError: raise HTTPSimpleResponse(http_client.BAD_REQUEST, 'Un-decodeable scheme') path, qs = path.partition(b'?')[::2] if parse_query: try: query = MultiDict.create_from_query_string(qs) except Exception: raise HTTPSimpleResponse(http_client.BAD_REQUEST, 'Unparseable query string') else: query = None try: path = '%2F'.join(unquote(x).decode('utf-8') for x in quoted_slash.split(path)) except ValueError as e: raise HTTPSimpleResponse(http_client.BAD_REQUEST, as_unicode(e)) path = tuple(filter(None, (x.replace('%2F', '/') for x in path.split('/')))) return scheme, path, query
def process_dir(self, dirpath, filenames, book_id): book_id = int(book_id) formats = list(filter(self.is_ebook_file, filenames)) fmts = [os.path.splitext(x)[1][1:].upper() for x in formats] sizes = [os.path.getsize(os.path.join(dirpath, x)) for x in formats] names = [os.path.splitext(x)[0] for x in formats] opf = os.path.join(dirpath, 'metadata.opf') mi = OPF(opf, basedir=dirpath).to_book_metadata() timestamp = os.path.getmtime(opf) path = os.path.relpath(dirpath, self.src_library_path).replace(os.sep, '/') if int(mi.application_id) == book_id: self.books.append({ 'mi': mi, 'timestamp': timestamp, 'formats': list(zip(fmts, sizes, names)), 'id': book_id, 'dirpath': dirpath, 'path': path, }) else: self.mismatched_dirs.append(dirpath) alm = mi.get('author_link_map', {}) for author, link in iteritems(alm): existing_link, timestamp = self.authors_links.get(author, (None, None)) if existing_link is None or existing_link != link and timestamp < mi.timestamp: self.authors_links[author] = (link, mi.timestamp)
def get_overrides(self, r, c, num_of_rows, num_of_cols_in_row): 'List of possible overrides for the given para' overrides = ['wholeTable'] def divisor(m, n): return (m - (m % n)) // n if c is not None: odd_column_band = (divisor(c, self.table_style.col_band_size) % 2) == 1 overrides.append('band%dVert' % (1 if odd_column_band else 2)) odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 1 overrides.append('band%dHorz' % (1 if odd_row_band else 2)) # According to the OOXML spec columns should have higher override # priority than rows, but Word seems to do it the other way around. if c is not None: if c == 0: overrides.append('firstCol') if c >= num_of_cols_in_row - 1: overrides.append('lastCol') if r == 0: overrides.append('firstRow') if r >= num_of_rows - 1: overrides.append('lastRow') if c is not None: if r == 0: if c == 0: overrides.append('nwCell') if c == num_of_cols_in_row - 1: overrides.append('neCell') if r == num_of_rows - 1: if c == 0: overrides.append('swCell') if c == num_of_cols_in_row - 1: overrides.append('seCell') return tuple(filter(self.override_allowed, overrides))
def find_identical_books(mi, data): author_map, aid_map, title_map, lang_map = data found_books = None for a in mi.authors: author_ids = author_map.get(icu_lower(a)) if author_ids is None: return set() books_by_author = {book_id for aid in author_ids for book_id in aid_map.get(aid, ())} if found_books is None: found_books = books_by_author else: found_books &= books_by_author if not found_books: return set() ans = set() titleq = fuzzy_title(mi.title) for book_id in found_books: title = title_map.get(book_id, '') if fuzzy_title(title) == titleq: ans.add(book_id) langq = tuple(filter(lambda x: x and x != 'und', map(canonicalize_lang, mi.languages or ()))) if not langq: return ans def lang_matches(book_id): book_langq = lang_map.get(book_id) return not book_langq or langq == book_langq return {book_id for book_id in ans if lang_matches(book_id)}
def multiple_sequences(expr, root): # Get all values for sequence elements matching expr, ensuring the returned # list contains distinct non-null elements preserving their order. ans = [] for item in XPath(expr)(root): ans += list(read_sequence(item)) return list(filter(None, uniq(ans)))
def tokenize(self): """Main class for handling other methods. Reads the file \ , uses method self.sub_reg to make basic substitutions,\ and process tokens by itself""" # read with open_for_read(self.__file) as read_obj: input_file = read_obj.read() # process simple replacements and split giving us a correct list # remove '' and \n in the process tokens = self.__sub_reg_split(input_file) # correct unicode tokens = map(self.__unicode_process, tokens) # remove empty items created by removing \uc tokens = list(filter(lambda x: len(x) > 0, tokens)) # write with open_for_write(self.__write_to) as write_obj: write_obj.write('\n'.join(tokens)) # Move and copy copy_obj = copy.Copy(bug_handler=self.__bug_handler) if self.__copy: copy_obj.copy_file(self.__write_to, "tokenize.data") copy_obj.rename(self.__write_to, self.__file) os.remove(self.__write_to)
def create_themeball(report, progress=None, abort=None): pool = ThreadPool(processes=cpu_count()) buf = BytesIO() num = count() error_occurred = Event() def optimize(name): if abort is not None and abort.is_set(): return if error_occurred.is_set(): return try: i = next(num) if progress is not None: progress(i, _('Optimizing %s') % name) srcpath = os.path.join(report.path, name) ext = srcpath.rpartition('.')[-1].lower() if ext == 'png': optimize_png(srcpath) elif ext in ('jpg', 'jpeg'): optimize_jpeg(srcpath) except Exception: return sys.exc_info() errors = tuple(filter(None, pool.map(optimize, tuple(report.name_map)))) pool.close(), pool.join() if abort is not None and abort.is_set(): return if errors: e = errors[0] reraise(*e) if progress is not None: progress(next(num), _('Creating theme file')) with ZipFile(buf, 'w') as zf: for name in report.name_map: srcpath = os.path.join(report.path, name) with lopen(srcpath, 'rb') as f: zf.writestr(name, f.read(), compression=ZIP_STORED) buf.seek(0) out = BytesIO() if abort is not None and abort.is_set(): return None, None if progress is not None: progress(next(num), _('Compressing theme file')) compress(buf, out, level=9) buf = BytesIO() prefix = report.name if abort is not None and abort.is_set(): return None, None with ZipFile(buf, 'w') as zf: with lopen(os.path.join(report.path, THEME_METADATA), 'rb') as f: zf.writestr(prefix + '/' + THEME_METADATA, f.read()) zf.writestr(prefix + '/' + THEME_COVER, create_cover(report)) zf.writestr(prefix + '/' + 'icons.zip.xz', out.getvalue(), compression=ZIP_STORED) if progress is not None: progress(next(num), _('Finished')) return buf.getvalue(), prefix
def ACQUISITION_ENTRY(book_id, updated, request_context): field_metadata = request_context.db.field_metadata mi = request_context.db.get_metadata(book_id) extra = [] if mi.rating > 0: rating = rating_to_stars(mi.rating) extra.append(_('RATING: %s<br />')%rating) if mi.tags: extra.append(_('TAGS: %s<br />')%xml(format_tag_string(mi.tags, None))) if mi.series: extra.append(_('SERIES: %(series)s [%(sidx)s]<br />')% dict(series=xml(mi.series), sidx=fmt_sidx(float(mi.series_index)))) for key in filter(request_context.ctx.is_field_displayable, field_metadata.ignorable_field_keys()): name, val = mi.format_field(key) if val: fm = field_metadata[key] datatype = fm['datatype'] if datatype == 'text' and fm['is_multiple']: extra.append('%s: %s<br />'% (xml(name), xml(format_tag_string(val, fm['is_multiple']['ui_to_list'], joinval=fm['is_multiple']['list_to_ui'])))) elif datatype == 'comments' or (fm['datatype'] == 'composite' and fm['display'].get('contains_html', False)): extra.append('%s: %s<br />'%(xml(name), comments_to_html(unicode_type(val)))) else: extra.append('%s: %s<br />'%(xml(name), xml(unicode_type(val)))) if mi.comments: comments = comments_to_html(mi.comments) extra.append(comments) if extra: extra = html_to_lxml('\n'.join(extra)) ans = E.entry(TITLE(mi.title), E.author(E.name(authors_to_string(mi.authors))), ID('urn:uuid:' + mi.uuid), UPDATED(mi.last_modified), E.published(mi.timestamp.isoformat())) if mi.pubdate and not is_date_undefined(mi.pubdate): ans.append(ans.makeelement('{%s}date' % DC_NS)) ans[-1].text = mi.pubdate.isoformat() if len(extra): ans.append(E.content(extra, type='xhtml')) get = partial(request_context.ctx.url_for, '/get', book_id=book_id, library_id=request_context.library_id) if mi.formats: fm = mi.format_metadata for fmt in mi.formats: fmt = fmt.lower() mt = guess_type('a.'+fmt)[0] if mt: link = E.link(type=mt, href=get(what=fmt), rel="http://opds-spec.org/acquisition") ffm = fm.get(fmt.upper()) if ffm: link.set('length', str(ffm['size'])) link.set('mtime', ffm['mtime'].isoformat()) ans.append(link) ans.append(E.link(type='image/jpeg', href=get(what='cover'), rel="http://opds-spec.org/cover")) ans.append(E.link(type='image/jpeg', href=get(what='thumb'), rel="http://opds-spec.org/thumbnail")) return ans
def map_authors(authors, rules=()): if not authors: return [] if not rules: return list(authors) ans = [] for a in authors: ans.extend(apply_rules(a, rules)) return uniq(list(filter(None, ans)))
def change_restriction(username): r = m.restrictions(username) if r is None: raise SystemExit('The user {} does not exist'.format(username)) if r['allowed_library_names']: prints( _('{} is currently only allowed to access the libraries named: {}') .format(username, ', '.join(r['allowed_library_names']))) if r['blocked_library_names']: prints( _('{} is currently not allowed to access the libraries named: {}') .format(username, ', '.join(r['blocked_library_names']))) if r['library_restrictions']: prints( _('{} has the following additional per-library restrictions:') .format(username)) for k, v in iteritems(r['library_restrictions']): prints(k + ':', v) else: prints(_('{} has no additional per-library restrictions').format(username)) c = choice( choices=[ _('Allow access to all libraries'), _('Allow access to only specified libraries'), _('Allow access to all, except specified libraries'), _('Change per-library restrictions'), _('Cancel')]) if c == 0: m.update_user_restrictions(username, {}) elif c == 3: while True: library = get_input(_('Enter the name of the library:')) if not library: break prints( _( 'Enter a search expression, access will be granted only to books matching this expression.' ' An empty expression will grant access to all books.')) plr = get_input(_('Search expression:')) if plr: r['library_restrictions'][library] = plr else: r['library_restrictions'].pop(library, None) m.update_user_restrictions(username, r) if get_input(_('Another restriction?') + ' (y/n):') != 'y': break elif c == 4: pass else: names = get_input(_('Enter a comma separated list of library names:')) names = list(filter(None, [x.strip() for x in names.split(',')])) w = 'allowed_library_names' if c == 1 else 'blocked_library_names' t = _('Allowing access only to libraries: {}') if c == 1 else _( 'Allowing access to all libraries, except: {}') prints(t.format(', '.join(names))) m.update_user_restrictions(username, {w: names})
def map_tags(tags, rules=()): if not tags: return [] if not rules: return list(tags) rules = [(r, matcher(r)) for r in rules] ans = [] for t in tags: ans.extend(apply_rules(t, rules)) return uniq(list(filter(None, ans)))
def completer(text, num): if self.completions is None: self.to_repl.put(('complete', text)) self.completions = list(filter(None, self.get_from_repl())) if not self.completions: return None try: return self.completions[num] except (IndexError, TypeError, AttributeError, KeyError): self.completions = None
def custom_dictionaries(reread=False): global _custom if _custom is None or reread: dics = [] for lc in glob.glob(os.path.join(config_dir, 'dictionaries', '*/locales')): locales = list(filter(None, open(lc, 'rb').read().decode('utf-8').splitlines())) try: name, locale, locales = locales[0], locales[1], locales[1:] except IndexError: continue base = os.path.dirname(lc) ploc = parse_lang_code(locale) if ploc.countrycode is None: continue dics.append(Dictionary( ploc, frozenset(filter(lambda x:x.countrycode is not None, map(parse_lang_code, locales))), os.path.join(base, '%s.dic' % locale), os.path.join(base, '%s.aff' % locale), False, name, os.path.basename(base))) _custom = frozenset(dics) return _custom
def normalize_languages(opf_languages, mi_languages): ' Preserve original country codes and use 2-letter lang codes where possible ' def parse(x): try: return parse_lang_code(x) except ValueError: return None opf_languages = filter(None, map(parse, opf_languages)) cc_map = {c.langcode:c.countrycode for c in opf_languages} mi_languages = filter(None, map(parse, mi_languages)) def norm(x): lc = x.langcode cc = x.countrycode or cc_map.get(lc, None) lc = lang_as_iso639_1(lc) or lc if cc: lc += '-' + cc return lc return list(map(norm, mi_languages))
def line_numbers(self): if self._line_numbers is None: def atoi(x): try: ans = int(x) except (TypeError, ValueError): ans = None return ans val = self.mainFrame().evaluateJavaScript('window.calibre_preview_integration.line_numbers()') self._line_numbers = sorted(uniq(list(filter(lambda x:x is not None, map(atoi, val))))) return self._line_numbers
def set_tags(root, prefixes, refines, val): for dc in XPath('./opf:metadata/dc:subject')(root): remove_element(dc, refines) m = XPath('./opf:metadata')(root)[0] if val: val = uniq(list(filter(None, val))) for x in val: c = m.makeelement(DC('subject')) c.text = normalize_whitespace(x) if c.text: m.append(c)
def contractions(col=None): global _cmap col = col or _collator if col is None: col = collator() ans = _cmap.get(collator, None) if ans is None: ans = col.contractions() ans = frozenset(filter(None, ans)) _cmap[col] = ans return ans
def builtin_dictionaries(): global _builtins if _builtins is None: dics = [] for lc in glob.glob(os.path.join(P('dictionaries', allow_user_override=False), '*/locales')): locales = list(filter(None, open(lc, 'rb').read().decode('utf-8').splitlines())) locale = locales[0] base = os.path.dirname(lc) dics.append(Dictionary( parse_lang_code(locale), frozenset(map(parse_lang_code, locales)), os.path.join(base, '%s.dic' % locale), os.path.join(base, '%s.aff' % locale), True, None, None)) _builtins = frozenset(dics) return _builtins
def remove_property_value(prop, predicate): ''' Remove the Values that match the predicate from this property. If all values of the property would be removed, the property is removed from its parent instead. Note that this means the property must have a parent (a CSSStyleDeclaration). ''' removed_vals = list(filter(predicate, prop.propertyValue)) if len(removed_vals) == len(prop.propertyValue): prop.parent.removeProperty(prop.name) else: x = css_text(prop.propertyValue) for v in removed_vals: x = x.replace(css_text(v), '').strip() prop.propertyValue.cssText = x return bool(removed_vals)
def run(self, archive): from calibre.utils.zipfile import ZipFile is_rar = archive.lower().endswith('.rar') if is_rar: from calibre.utils.unrar import extract_member, names else: zf = ZipFile(archive, 'r') if is_rar: fnames = list(names(archive)) else: fnames = zf.namelist() def fname_ok(fname): bn = os.path.basename(fname).lower() if bn == 'thumbs.db': return False if '.' not in bn: return False if bn.rpartition('.')[-1] in {'diz', 'nfo'}: return False if '__MACOSX' in fname.split('/'): return False return True fnames = list(filter(fname_ok, fnames)) if is_comic(fnames): ext = '.cbr' if is_rar else '.cbz' of = self.temporary_file('_archive_extract'+ext) with open(archive, 'rb') as f: of.write(f.read()) of.close() return of.name if len(fnames) > 1 or not fnames: return archive fname = fnames[0] ext = os.path.splitext(fname)[1][1:] if ext.lower() not in { 'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}: return archive of = self.temporary_file('_archive_extract.'+ext) with closing(of): if is_rar: data = extract_member(archive, match=None, name=fname)[1] of.write(data) else: of.write(zf.read(fname)) return of.name
def load_icon_resource(icon_resource, as_data=False, size=ICON_SIZE): if not icon_resource: return parts = tuple(filter(None, re.split(r',([-0-9]+$)', icon_resource))) if len(parts) != 2: return module, index = parts index = int(index) if module.startswith('"') and module.endswith('"'): module = split_commandline(module)[0] try: return load_icon(module, index, as_data=as_data, size=size) except Exception: return simple_load_icon(module, index, as_data=as_data, size=size)
def __sub_reg_split(self,input_file): input_file = self.__replace_spchar.mreplace(input_file) # this is for older RTF input_file = self.__par_exp.sub(r'\n\\par \n', input_file) input_file = self.__cwdigit_exp.sub(r"\g<1>\n\g<2>", input_file) input_file = self.__cs_ast.sub(r"\g<1>", input_file) input_file = self.__ms_hex_exp.sub(r"\\mshex0\g<1> ", input_file) input_file = self.__utf_ud.sub(r"\\{\\uc0 \g<1>\\}", input_file) # remove \n in bin data input_file = self.__bin_exp.sub(lambda x: x.group().replace('\n', '') + '\n', input_file) # split tokens = re.split(self.__splitexp, input_file) # remove empty tokens and \n return list(filter(lambda x: len(x) > 0 and x != '\n', tokens))
def get_plugin_updates_available(raise_error=False): ''' API exposed to read whether there are updates available for any of the installed user plugins. Returns None if no updates found Returns list(DisplayPlugin) of plugins installed that have a new version ''' if not has_external_plugins(): return None display_plugins = read_available_plugins(raise_error=raise_error) if display_plugins: update_plugins = list(filter(filter_upgradeable_plugins, display_plugins)) if len(update_plugins) > 0: return update_plugins return None
def polish_index_markup(index, blocks): # Blocks are in reverse order at this point path_map = {} ldict = {} for block in blocks: cls = block.get('class', '') or '' block.set('class', (cls + ' index-entry').lstrip()) a = block.xpath('descendant::a[1]') text = '' if a: text = etree.tostring(a[0], method='text', with_tail=False, encoding='unicode').strip() if ':' in text: path_map[block] = parts = list( filter(None, (x.strip() for x in text.split(':')))) if len(parts) > 1: split_up_block(block, a[0], text, parts, ldict) else: # try using a span all the time path_map[block] = [text] parent = a[0].getparent() span = parent.makeelement('span', style='display:block; margin-left: 0em') parent.append(span) span.append(a[0]) ldict[span] = 0 for br in block.xpath('descendant::br'): br.tail = None # We want a single block for each main entry prev_block = blocks[0] for block in blocks[1:]: pp, pn = path_map[prev_block], path_map[block] if pp[0] == pn[0]: merge_blocks(prev_block, block, 0, 0, pn, ldict) else: prev_block = block
def find_font_for_text(self, text, allowed_families={'serif', 'sans-serif'}, preferred_families=('serif', 'sans-serif', 'monospace', 'cursive', 'fantasy')): ''' Find a font on the system capable of rendering the given text. Returns a font family (as given by fonts_for_family()) that has a "normal" font and that can render the supplied text. If no such font exists, returns None. :return: (family name, faces) or None, None ''' from calibre.utils.fonts.utils import (supports_text, panose_to_css_generic_family, get_printable_characters) if not isinstance(text, unicode_type): raise TypeError(u'%r is not unicode'%text) text = get_printable_characters(text) found = {} def filter_faces(font): try: raw = self.get_font_data(font) return supports_text(raw, text) except: pass return False for family in self.find_font_families(): faces = list(filter(filter_faces, self.fonts_for_family(family))) if not faces: continue generic_family = panose_to_css_generic_family(faces[0]['panose']) if generic_family in allowed_families or generic_family == preferred_families[0]: return (family, faces) elif generic_family not in found: found[generic_family] = (family, faces) for f in preferred_families: if f in found: return found[f] return None, None
def get_device_languages(hub_handle, device_port, buf=None): ' Get the languages supported by the device for strings ' buf = alloc_descriptor_buf(buf) def initbuf(b): p = cast(b, PUSB_DESCRIPTOR_REQUEST).contents p.ConnectionIndex = device_port sp = p.SetupPacket sp.bmRequest, sp.bRequest = 0x80, USB_REQUEST_GET_DESCRIPTOR sp.wValue[1] = USB_STRING_DESCRIPTOR_TYPE sp.wLength = MAXIMUM_USB_STRING_LENGTH + 2 buf, bytes_returned = device_io_control( hub_handle, IOCTL_USB_GET_DESCRIPTOR_FROM_NODE_CONNECTION, buf, buf, initbuf) data = cast(buf, PUSB_DESCRIPTOR_REQUEST).contents.Data sz, dtype = data.bLength, data.bType if dtype != 0x03: raise WindowsError('Invalid datatype for string descriptor: 0x%x' % dtype) data = cast(data.String, POINTER(USHORT * (sz // 2))) return buf, list(filter(None, data.contents))
def __init__( self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3, scorer=None ): with wlock: if not workers: requests, results = Queue(), Queue() w = [Worker(requests, results) for i in range(max(1, cpu_count()))] [x.start() for x in w] workers.extend(w) items = map(lambda x: normalize('NFC', unicode_type(x)), filter(None, items)) self.items = items = tuple(items) tasks = split(items, len(workers)) self.task_maps = [{j: i for j, (i, _) in enumerate(task)} for task in tasks] scorer = scorer or default_scorer self.scorers = [ scorer(tuple(map(itemgetter(1), task_items))) for task_items in tasks ] self.sort_keys = None
def get_overrides(self, r, c, num_of_rows, num_of_cols_in_row): 'List of possible overrides for the given para' overrides = ['wholeTable'] def divisor(m, n): return (m - (m % n)) // n if c is not None: odd_column_band = (divisor(c, self.table_style.col_band_size) % 2) == 1 overrides.append('band%dVert' % (1 if odd_column_band else 2)) odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 1 overrides.append('band%dHorz' % (1 if odd_row_band else 2)) # According to the OOXML spec columns should have higher override # priority than rows, but Word seems to do it the other way around. if c is not None: if c == 0: overrides.append('firstCol') if c >= num_of_cols_in_row - 1: overrides.append('lastCol') if r == 0: overrides.append('firstRow') if r >= num_of_rows - 1: overrides.append('lastRow') if c is not None: if r == 0: if c == 0: overrides.append('nwCell') if c == num_of_cols_in_row - 1: overrides.append('neCell') if r == num_of_rows - 1: if c == 0: overrides.append('swCell') if c == num_of_cols_in_row - 1: overrides.append('seCell') return tuple(filter(self.override_allowed, overrides))
def load_icon_resource_as_pixmap(icon_resource, size=ICON_SIZE): if not icon_resource: return parts = tuple(filter(None, re.split(r',([-0-9]+$)', icon_resource))) if len(parts) != 2: return module, index = parts index = int(index) if module.startswith('"') and module.endswith('"'): module = split_commandline(module)[0] hmodule = winutil.load_library(module, winutil.LOAD_LIBRARY_AS_DATAFILE | winutil.LOAD_LIBRARY_AS_IMAGE_RESOURCE) icons = winutil.load_icons(hmodule, index) pixmaps = [] must_use_qt() for icon_data, icon_handle in icons: pixmap = QPixmap() pixmap.loadFromData(icon_data) if pixmap.isNull() and bool(icon_handle): pixmap = hicon_to_pixmap(icon_handle) if pixmap.isNull(): continue pixmaps.append(pixmap) if not pixmaps: return def area(p): return p.width() * p.height() pixmaps.sort(key=area) q = size * size for pmap in pixmaps: if area(pmap) >= q: if area(pmap) == q: return pmap return pmap.scaled(size, size, aspectRatioMode=Qt.KeepAspectRatio, transformMode=Qt.SmoothTransformation) return pixmaps[-1].scaled(size, size, aspectRatioMode=Qt.KeepAspectRatio, transformMode=Qt.SmoothTransformation)
def find_identical_books(mi, data): author_map, aid_map, title_map, lang_map = data found_books = None for a in mi.authors: author_ids = author_map.get(icu_lower(a)) if author_ids is None: return set() books_by_author = { book_id for aid in author_ids for book_id in aid_map.get(aid, ()) } if found_books is None: found_books = books_by_author else: found_books &= books_by_author if not found_books: return set() ans = set() titleq = fuzzy_title(mi.title) for book_id in found_books: title = title_map.get(book_id, '') if fuzzy_title(title) == titleq: ans.add(book_id) langq = tuple( filter(lambda x: x and x != 'und', map(canonicalize_lang, mi.languages or ()))) if not langq: return ans def lang_matches(book_id): book_langq = lang_map.get(book_id) return not book_langq or langq == book_langq return {book_id for book_id in ans if lang_matches(book_id)}
def sony_metadata(oeb): m = oeb.metadata title = short_title = unicode_type(m.title[0]) publisher = __appname__ + ' ' + __version__ try: pt = unicode_type(oeb.metadata.publication_type[0]) short_title = ':'.join(pt.split(':')[2:]) except: pass try: date = parse_date(unicode_type(m.date[0]), as_utc=False).strftime('%Y-%m-%d') except: date = strftime('%Y-%m-%d') try: language = unicode_type(m.language[0]).replace('_', '-') except: language = 'en' short_title = xml(short_title, True) metadata = SONY_METADATA.format(title=xml(title), short_title=short_title, publisher=xml(publisher), issue_date=xml(date), language=xml(language)) updated = strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) def cal_id(x): for k, v in x.attrib.items(): if k.endswith('scheme') and v == 'uuid': return True try: base_id = unicode_type(list(filter(cal_id, m.identifier))[0]) except: base_id = unicode_type(uuid4()) toc = oeb.toc if False and toc.depth() < 3: # Single section periodical # Disabled since I prefer the current behavior from calibre.ebooks.oeb.base import TOC section = TOC(klass='section', title=_('All articles'), href=oeb.spine[2].href) for x in toc: section.nodes.append(x) toc = TOC(klass='periodical', href=oeb.spine[2].href, title=unicode_type(oeb.metadata.title[0])) toc.nodes.append(section) entries = [] seen_titles = set() for i, section in enumerate(toc): if not section.href: continue secid = 'section%d'%i sectitle = section.title if not sectitle: sectitle = _('Unknown') d = 1 bsectitle = sectitle while sectitle in seen_titles: sectitle = bsectitle + ' ' + unicode_type(d) d += 1 seen_titles.add(sectitle) sectitle = xml(sectitle, True) secdesc = section.description if not secdesc: secdesc = '' secdesc = xml(secdesc) entries.append(SONY_ATOM_SECTION.format(title=sectitle, href=section.href, id=xml(base_id)+'/'+secid, short_title=short_title, desc=secdesc, updated=updated)) for j, article in enumerate(section): if not article.href: continue atitle = article.title btitle = atitle d = 1 while atitle in seen_titles: atitle = btitle + ' ' + unicode_type(d) d += 1 auth = article.author if article.author else '' desc = section.description if not desc: desc = '' aid = 'article%d'%j entries.append(SONY_ATOM_ENTRY.format( title=xml(atitle), author=xml(auth), updated=updated, desc=desc, short_title=short_title, section_title=sectitle, href=article.href, word_count=unicode_type(1), id=xml(base_id)+'/'+secid+'/'+aid )) atom = SONY_ATOM.format(short_title=short_title, entries='\n\n'.join(entries), updated=updated, id=xml(base_id)).encode('utf-8') return metadata, atom
def build_index(rd, books, num, search, sort, order, start, total, url_base, field_metadata, ctx, library_map, library_id): # {{{ logo = E.div(E.img(src=ctx.url_for('/static', what='calibre.png'), alt=__appname__), id='logo') search_box = build_search_box(num, search, sort, order, ctx, field_metadata, library_id) navigation = build_navigation(start, num, total, url_base) navigation2 = build_navigation(start, num, total, url_base) if library_map: choose_library = build_choose_library(ctx, library_map) books_table = E.table(id='listing') body = E.body(logo, search_box, navigation, E.hr(class_='spacer'), books_table, E.hr(class_='spacer'), navigation2) for book in books: thumbnail = E.td( E.img(type='image/jpeg', border='0', src=ctx.url_for('/get', what='thumb', book_id=book.id, library_id=library_id), class_='thumbnail')) data = E.td() for fmt in book.formats or (): if not fmt or fmt.lower().startswith('original_'): continue s = E.span(E.a(fmt.lower(), href=ctx.url_for('/legacy/get', what=fmt, book_id=book.id, library_id=library_id, filename=book_filename( rd, book.id, book, fmt))), class_='button') s.tail = u'' data.append(s) div = E.div(class_='data-container') data.append(div) series = ('[%s - %s]' % (book.series, book.series_index)) if book.series else '' tags = ('Tags=[%s]' % ', '.join(book.tags)) if book.tags else '' ctext = '' for key in filter(ctx.is_field_displayable, field_metadata.ignorable_field_keys()): fm = field_metadata[key] if fm['datatype'] == 'comments': continue name, val = book.format_field(key) if val: ctext += '%s=[%s] ' % (name, val) first = E.span('%s %s by %s' % (book.title, series, authors_to_string(book.authors)), class_='first-line') div.append(first) ds = '' if is_date_undefined(book.timestamp) else strftime( '%d %b, %Y', t=dt_as_local(book.timestamp).timetuple()) second = E.span('%s %s %s' % (ds, tags, ctext), class_='second-line') div.append(second) books_table.append(E.tr(thumbnail, data)) if library_map: body.append(choose_library) body.append( E.div(E.a(_('Switch to the full interface (non-mobile interface)'), href=ctx.url_for(None), style="text-decoration: none; color: blue", title=_('The full interface gives you many more features, ' 'but it may not work well on a small screen')), style="text-align:center")) return E.html( E.head( E.title(__appname__ + ' Library'), E.link(rel='icon', href=ctx.url_for('/favicon.png'), type='image/png'), E.link(rel='stylesheet', type='text/css', href=ctx.url_for('/static', what='mobile.css')), E.link(rel='apple-touch-icon', href=ctx.url_for("/static", what='calibre.png')), E.meta(name="robots", content="noindex")), # End head body) # End html
def workaround_ade_quirks(self): # {{{ ''' Perform various markup transforms to get the output to render correctly in the quirky ADE. ''' from calibre.ebooks.oeb.base import XPath, XHTML, barename, urlunquote stylesheet = self.oeb.manifest.main_stylesheet # ADE cries big wet tears when it encounters an invalid fragment # identifier in the NCX toc. frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$') for node in self.oeb.toc.iter(): href = getattr(node, 'href', None) if hasattr(href, 'partition'): base, _, frag = href.partition('#') frag = urlunquote(frag) if frag and frag_pat.match(frag) is None: self.log.warn( 'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it' % frag) node.href = base for x in self.oeb.spine: root = x.data body = XPath('//h:body')(root) if body: body = body[0] if hasattr(body, 'xpath'): # remove <img> tags with empty src elements bad = [] for x in XPath('//h:img')(body): src = x.get('src', '').strip() if src in ('', '#') or src.startswith('http:'): bad.append(x) for img in bad: img.getparent().remove(img) # Add id attribute to <a> tags that have name for x in XPath('//h:a[@name]')(body): if not x.get('id', False): x.set('id', x.get('name')) # The delightful epubcheck has started complaining about <a> tags that # have name attributes. x.attrib.pop('name') # Replace <br> that are children of <body> as ADE doesn't handle them for br in XPath('./h:br')(body): if br.getparent() is None: continue try: prior = next(br.itersiblings(preceding=True)) priortag = barename(prior.tag) priortext = prior.tail except: priortag = 'body' priortext = body.text if priortext: priortext = priortext.strip() br.tag = XHTML('p') br.text = '\u00a0' style = br.get('style', '').split(';') style = list(filter(None, map(lambda x: x.strip(), style))) style.append('margin:0pt; border:0pt') # If the prior tag is a block (including a <br> we replaced) # then this <br> replacement should have a 1-line height. # Otherwise it should have no height. if not priortext and priortag in block_level_tags: style.append('height:1em') else: style.append('height:0pt') br.set('style', '; '.join(style)) for tag in XPath('//h:embed')(root): tag.getparent().remove(tag) for tag in XPath('//h:object')(root): if tag.get('type', '').lower().strip() in { 'image/svg+xml', 'application/svg+xml' }: continue tag.getparent().remove(tag) for tag in XPath('//h:title|//h:style')(root): if not tag.text: tag.getparent().remove(tag) for tag in XPath('//h:script')(root): if (not tag.text and not tag.get('src', False) and tag.get('type', None) != 'text/x-mathjax-config'): tag.getparent().remove(tag) for tag in XPath('//h:body/descendant::h:script')(root): tag.getparent().remove(tag) formchildren = XPath('./h:input|./h:button|./h:textarea|' './h:label|./h:fieldset|./h:legend') for tag in XPath('//h:form')(root): if formchildren(tag): tag.getparent().remove(tag) else: # Not a real form tag.tag = XHTML('div') for tag in XPath('//h:center')(root): tag.tag = XHTML('div') tag.set('style', 'text-align:center') # ADE can't handle & in an img url for tag in XPath('//h:img[@src]')(root): tag.set('src', tag.get('src', '').replace('&', '')) # ADE whimpers in fright when it encounters a <td> outside a # <table> in_table = XPath('ancestor::h:table') for tag in XPath('//h:td|//h:tr|//h:th')(root): if not in_table(tag): tag.tag = XHTML('div') # ADE fails to render non breaking hyphens/soft hyphens/zero width spaces special_chars = re.compile('[\u200b\u00ad]') for elem in root.iterdescendants('*'): if elem.text: elem.text = special_chars.sub('', elem.text) elem.text = elem.text.replace('\u2011', '-') if elem.tail: elem.tail = special_chars.sub('', elem.tail) elem.tail = elem.tail.replace('\u2011', '-') if stylesheet is not None: # ADE doesn't render lists correctly if they have left margins from css_parser.css import CSSRule for lb in XPath('//h:ul[@class]|//h:ol[@class]')(root): sel = '.' + lb.get('class') for rule in stylesheet.data.cssRules.rulesOfType( CSSRule.STYLE_RULE): if sel == rule.selectorList.selectorText: rule.style.removeProperty('margin-left') # padding-left breaks rendering in webkit and gecko rule.style.removeProperty('padding-left') # Change whitespace:pre to pre-wrap to accommodate readers that # cannot scroll horizontally for rule in stylesheet.data.cssRules.rulesOfType( CSSRule.STYLE_RULE): style = rule.style ws = style.getPropertyValue('white-space') if ws == 'pre': style.setProperty('white-space', 'pre-wrap')
def _finished(self, *args): if self.model: update_plugins = list( filter(filter_upgradeable_plugins, self.model.display_plugins)) self.gui.recalc_update_label(len(update_plugins))
def edit_tags(self): from calibre.gui2.dialogs.tag_editor import TagEditor d = TagEditor(self, get_gui().current_db, current_tags=list(filter(None, [x.strip() for x in self.query.text().split(',')]))) if d.exec_() == d.Accepted: self.query.setText(', '.join(d.tags))
def py_clean_xml_chars(unicode_string): return u''.join(filter(allowed, unicode_string))
def change_restriction(username): r = m.restrictions(username) if r is None: raise SystemExit('The user {} does not exist'.format(username)) if r['allowed_library_names']: libs = r['allowed_library_names'] prints( ngettext( '{} is currently only allowed to access the library named: {}', '{} is currently only allowed to access the libraries named: {}', len(libs)).format(username, ', '.join(libs))) if r['blocked_library_names']: libs = r['blocked_library_names'] prints( ngettext( '{} is currently not allowed to access the library named: {}', '{} is currently not allowed to access the libraries named: {}', len(libs)).format(username, ', '.join(libs))) if r['library_restrictions']: prints( _('{} has the following additional per-library restrictions:'). format(username)) for k, v in iteritems(r['library_restrictions']): prints(k + ':', v) else: prints( _('{} has no additional per-library restrictions').format( username)) c = choice(choices=[ _('Allow access to all libraries'), _('Allow access to only specified libraries'), _('Allow access to all, except specified libraries'), _('Change per-library restrictions'), _('Cancel') ]) if c == 0: m.update_user_restrictions(username, {}) elif c == 3: while True: library = get_input(_('Enter the name of the library:')) if not library: break prints( _('Enter a search expression, access will be granted only to books matching this expression.' ' An empty expression will grant access to all books.')) plr = get_input(_('Search expression:')) if plr: r['library_restrictions'][library] = plr else: r['library_restrictions'].pop(library, None) m.update_user_restrictions(username, r) if get_input(_('Another restriction?') + ' (y/n):') != 'y': break elif c == 4: pass else: names = get_input( _('Enter a comma separated list of library names:')) names = list(filter(None, [x.strip() for x in names.split(',')])) w = 'allowed_library_names' if c == 1 else 'blocked_library_names' t = _('Allowing access only to libraries: {}') if c == 1 else _( 'Allowing access to all libraries, except: {}') prints(t.format(', '.join(names))) m.update_user_restrictions(username, {w: names})
def run(self, path_to_output, opts, db, notification=DummyReporter()): from calibre.utils.date import isoformat from calibre.utils.html2text import html2text from calibre.utils.bibtex import BibTeX from calibre.library.save_to_disk import preprocess_template from calibre.utils.logging import default_log as log from calibre.utils.filenames import ascii_text library_name = os.path.basename(db.library_path) def create_bibtex_entry(entry, fields, mode, template_citation, bibtexdict, db, citation_bibtex=True, calibre_files=True): # Bibtex doesn't like UTF-8 but keep unicode until writing # Define starting chain or if book valid strict and not book return a Fail string bibtex_entry = [] if mode != "misc" and check_entry_book_valid(entry): bibtex_entry.append('@book{') elif mode != "book": bibtex_entry.append('@misc{') else: # case strict book return '' if citation_bibtex: # Citation tag bibtex_entry.append( make_bibtex_citation(entry, template_citation, bibtexdict)) bibtex_entry = [' '.join(bibtex_entry)] for field in fields: if field.startswith('#'): item = db.get_field(entry['id'], field, index_is_id=True) if isinstance(item, (bool, numbers.Number)): item = repr(item) elif field == 'title_sort': item = entry['sort'] elif field == 'library_name': item = library_name else: item = entry[field] # check if the field should be included (none or empty) if item is None: continue try: if len(item) == 0: continue except TypeError: pass if field == 'authors': bibtex_entry.append('author = "%s"' % bibtexdict.bibtex_author_format(item)) elif field == 'id': bibtex_entry.append('calibreid = "%s"' % int(item)) elif field == 'rating': bibtex_entry.append('rating = "%s"' % int(item)) elif field == 'size': bibtex_entry.append('%s = "%s octets"' % (field, int(item))) elif field == 'tags': # A list to flatten bibtex_entry.append( 'tags = "%s"' % bibtexdict.utf8ToBibtex(', '.join(item))) elif field == 'comments': # \n removal item = item.replace('\r\n', ' ') item = item.replace('\n', ' ') # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces) item = bibtexdict.stripUnmatchedSyntax(item, '{', '}') # html to text try: item = html2text(item) except: log.warn("Failed to convert comments to text") bibtex_entry.append('note = "%s"' % bibtexdict.utf8ToBibtex(item)) elif field == 'isbn': # Could be 9, 10 or 13 digits bibtex_entry.append('isbn = "%s"' % format_isbn(item)) elif field == 'formats': # Add file path if format is selected formats = [ format.rpartition('.')[2].lower() for format in item ] bibtex_entry.append('formats = "%s"' % ', '.join(formats)) if calibre_files: files = [ ':%s:%s' % (format, format.rpartition('.')[2].upper()) for format in item ] bibtex_entry.append('file = "%s"' % ', '.join(files)) elif field == 'series_index': bibtex_entry.append('volume = "%s"' % int(item)) elif field == 'timestamp': bibtex_entry.append('timestamp = "%s"' % isoformat(item).partition('T')[0]) elif field == 'pubdate': bibtex_entry.append('year = "%s"' % item.year) bibtex_entry.append( 'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item))) elif field.startswith('#') and isinstance( item, string_or_bytes): bibtex_entry.append( 'custom_%s = "%s"' % (field[1:], bibtexdict.utf8ToBibtex(item))) elif isinstance(item, string_or_bytes): # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice', # 'author_sort', 'series', 'title_sort'] : bibtex_entry.append('%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item))) bibtex_entry = ',\n '.join(bibtex_entry) bibtex_entry += ' }\n\n' return bibtex_entry def check_entry_book_valid(entry): # Check that the required fields are ok for a book entry for field in ['title', 'authors', 'publisher']: if entry[field] is None or len(entry[field]) == 0: return False if entry['pubdate'] is None: return False else: return True def make_bibtex_citation(entry, template_citation, bibtexclass): # define a function to replace the template entry by its value def tpl_replace(objtplname): tpl_field = re.sub(r'[\{\}]', '', objtplname.group()) if tpl_field in TEMPLATE_ALLOWED_FIELDS: if tpl_field in ['pubdate', 'timestamp']: tpl_field = isoformat( entry[tpl_field]).partition('T')[0] elif tpl_field in ['tags', 'authors']: tpl_field = entry[tpl_field][0] elif tpl_field in ['id', 'series_index']: tpl_field = unicode_type(entry[tpl_field]) else: tpl_field = entry[tpl_field] return ascii_text(tpl_field) else: return '' if len(template_citation) > 0: tpl_citation = bibtexclass.utf8ToBibtex( bibtexclass.ValidateCitationKey( re.sub(r'\{[^{}]*\}', tpl_replace, template_citation))) if len(tpl_citation) > 0: return tpl_citation if len(entry["isbn"]) > 0: template_citation = '%s' % re.sub(r'[\D]', '', entry["isbn"]) else: template_citation = '%s' % unicode_type(entry["id"]) return bibtexclass.ValidateCitationKey(template_citation) self.fmt = path_to_output.rpartition('.')[2] self.notification = notification # Combobox options bibfile_enc = ['utf8', 'cp1252', 'ascii'] bibfile_enctag = ['strict', 'replace', 'ignore', 'backslashreplace'] bib_entry = ['mixed', 'misc', 'book'] # Needed beacause CLI return str vs int by widget try: bibfile_enc = bibfile_enc[opts.bibfile_enc] bibfile_enctag = bibfile_enctag[opts.bibfile_enctag] bib_entry = bib_entry[opts.bib_entry] except: if opts.bibfile_enc in bibfile_enc: bibfile_enc = opts.bibfile_enc else: log.warn("Incorrect --choose-encoding flag, revert to default") bibfile_enc = bibfile_enc[0] if opts.bibfile_enctag in bibfile_enctag: bibfile_enctag = opts.bibfile_enctag else: log.warn( "Incorrect --choose-encoding-configuration flag, revert to default" ) bibfile_enctag = bibfile_enctag[0] if opts.bib_entry in bib_entry: bib_entry = opts.bib_entry else: log.warn("Incorrect --entry-type flag, revert to default") bib_entry = bib_entry[0] if opts.verbose: opts_dict = vars(opts) log("%s(): Generating %s" % (self.name, self.fmt)) if opts.connected_device['is_device_connected']: log(" connected_device: %s" % opts.connected_device['name']) if opts_dict['search_text']: log(" --search='%s'" % opts_dict['search_text']) if opts_dict['ids']: log(" Book count: %d" % len(opts_dict['ids'])) if opts_dict['search_text']: log(" (--search ignored when a subset of the database is specified)" ) if opts_dict['fields']: if opts_dict['fields'] == 'all': log(" Fields: %s" % ', '.join(FIELDS[1:])) else: log(" Fields: %s" % opts_dict['fields']) log(" Output file will be encoded in %s with %s flag" % (bibfile_enc, bibfile_enctag)) log(" BibTeX entry type is %s with a citation like '%s' flag" % (bib_entry, opts_dict['bib_cit'])) # If a list of ids are provided, don't use search_text if opts.ids: opts.search_text = None data = self.search_sort_db(db, opts) if not len(data): log.error( "\nNo matching database entries for search criteria '%s'" % opts.search_text) # Get the requested output fields as a list fields = self.get_output_fields(db, opts) if not len(data): log.error( "\nNo matching database entries for search criteria '%s'" % opts.search_text) # Initialize BibTeX class bibtexc = BibTeX() # Entries writing after Bibtex formating (or not) if bibfile_enc != 'ascii': bibtexc.ascii_bibtex = False else: bibtexc.ascii_bibtex = True # Check citation choice and go to default in case of bad CLI if isinstance(opts.impcit, string_or_bytes): if opts.impcit == 'False': citation_bibtex = False elif opts.impcit == 'True': citation_bibtex = True else: log.warn("Incorrect --create-citation, revert to default") citation_bibtex = True else: citation_bibtex = opts.impcit # Check add file entry and go to default in case of bad CLI if isinstance(opts.addfiles, string_or_bytes): if opts.addfiles == 'False': addfiles_bibtex = False elif opts.addfiles == 'True': addfiles_bibtex = True else: log.warn("Incorrect --add-files-path, revert to default") addfiles_bibtex = True else: addfiles_bibtex = opts.addfiles # Preprocess for error and light correction template_citation = preprocess_template(opts.bib_cit) # Open output and write entries with codecs.open(path_to_output, 'w', bibfile_enc, bibfile_enctag)\ as outfile: # File header nb_entries = len(data) # check in book strict if all is ok else throw a warning into log if bib_entry == 'book': nb_books = len(list(filter(check_entry_book_valid, data))) if nb_books < nb_entries: log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries)) nb_entries = nb_books # If connected device, add 'On Device' values to data if opts.connected_device[ 'is_device_connected'] and 'ondevice' in fields: for entry in data: entry[ 'ondevice'] = db.catalog_plugin_on_device_temp_mapping[ entry['id']]['ondevice'] # outfile.write('%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries)) outfile.write( '@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n' % (nb_entries, strftime("%A, %d. %B %Y %H:%M"))) for entry in data: outfile.write( create_bibtex_entry(entry, fields, bib_entry, template_citation, bibtexc, db, citation_bibtex, addfiles_bibtex))
def metadata_from_xmp_packet(raw_bytes): root = parse_xmp_packet(raw_bytes) mi = Metadata(_('Unknown')) title = first_alt('//dc:title', root) if title: if title.startswith(r'\376\377'): # corrupted XMP packet generated by Nitro PDF. See # https://bugs.launchpad.net/calibre/+bug/1541981 raise ValueError( 'Corrupted XMP metadata packet detected, probably generated by Nitro PDF' ) mi.title = title authors = multiple_sequences('//dc:creator', root) if authors: mi.authors = authors tags = multiple_sequences('//dc:subject', root) or multiple_sequences( '//pdf:Keywords', root) if tags: mi.tags = tags comments = first_alt('//dc:description', root) if comments: mi.comments = comments publishers = multiple_sequences('//dc:publisher', root) if publishers: mi.publisher = publishers[0] try: pubdate = parse_date(first_sequence('//dc:date', root) or first_simple('//xmp:CreateDate', root), assume_utc=False) except: pass else: mi.pubdate = pubdate bkp = first_simple('//xmp:CreatorTool', root) if bkp: mi.book_producer = bkp md = safe_parse_date(first_simple('//xmp:MetadataDate', root)) mod = safe_parse_date(first_simple('//xmp:ModifyDate', root)) fd = more_recent(md, mod) if fd is not None: mi.metadata_date = fd rating = first_simple('//calibre:rating', root) if rating is not None: try: rating = float(rating) if 0 <= rating <= 10: mi.rating = rating except (ValueError, TypeError): pass series, series_index = read_series(root) if series: mi.series, mi.series_index = series, series_index for x in ('title_sort', 'author_sort'): for elem in XPath('//calibre:' + x)(root): val = read_simple_property(elem) if val: setattr(mi, x, val) break for x in ('author_link_map', 'user_categories'): val = first_simple('//calibre:' + x, root) if val: try: setattr(mi, x, json.loads(val)) except: pass languages = multiple_sequences('//dc:language', root) if languages: languages = list(filter(None, map(canonicalize_lang, languages))) if languages: mi.languages = languages identifiers = {} for xmpid in XPath('//xmp:Identifier')(root): for scheme, value in read_xmp_identifers(xmpid): if scheme and value: identifiers[scheme.lower()] = value for namespace in ('prism', 'pdfx'): for scheme in KNOWN_ID_SCHEMES: if scheme not in identifiers: val = first_simple('//%s:%s' % (namespace, scheme), root) scheme = scheme.lower() if scheme == 'isbn': val = check_isbn(val) elif scheme == 'doi': val = check_doi(val) if val: identifiers[scheme] = val # Check Dublin Core for recognizable identifier types for scheme, check_func in iteritems({ 'doi': check_doi, 'isbn': check_isbn }): if scheme not in identifiers: val = check_func(first_simple('//dc:identifier', root)) if val: identifiers['doi'] = val if identifiers: mi.set_identifiers(identifiers) read_user_metadata(mi, root) return mi
def read_tags(root, prefixes, refines): ans = [] for dc in XPath('./opf:metadata/dc:subject')(root): if dc.text: ans.extend(map(normalize_whitespace, dc.text.split(','))) return uniq(list(filter(None, ans)))
def metadata_to_xmp_packet(mi): A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x')) R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf')) root = A.xmpmeta(R.RDF) rdf = root[0] dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc')) dc.set(expand('rdf:about'), '') rdf.append(dc) for prop, tag in iteritems({ 'title': 'dc:title', 'comments': 'dc:description' }): val = mi.get(prop) or '' create_alt_property(dc, tag, val) for prop, (tag, ordered) in iteritems({ 'authors': ('dc:creator', True), 'tags': ('dc:subject', False), 'publisher': ('dc:publisher', False), }): val = mi.get(prop) or () if isinstance(val, string_or_bytes): val = [val] create_sequence_property(dc, tag, val, ordered) if not mi.is_null('pubdate'): create_sequence_property(dc, 'dc:date', [isoformat(mi.pubdate, as_utc=False) ]) # Adobe spec recommends local time if not mi.is_null('languages'): langs = list( filter( None, map(lambda x: lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages))) if langs: create_sequence_property(dc, 'dc:language', langs, ordered=False) xmp = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('xmp', 'xmpidq')) xmp.set(expand('rdf:about'), '') rdf.append(xmp) extra_ids = {} for x in ('prism', 'pdfx'): p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap(x)) p.set(expand('rdf:about'), '') rdf.append(p) identifiers = mi.get_identifiers() if identifiers: create_identifiers(xmp, identifiers) for scheme, val in iteritems(identifiers): if scheme in {'isbn', 'doi'}: for prefix, parent in iteritems(extra_ids): ie = parent.makeelement(expand('%s:%s' % (prefix, scheme))) ie.text = val parent.append(ie) d = xmp.makeelement(expand('xmp:MetadataDate')) d.text = isoformat(now(), as_utc=False) xmp.append(d) calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC')) calibre.set(expand('rdf:about'), '') rdf.append(calibre) if not mi.is_null('rating'): try: r = float(mi.rating) except (TypeError, ValueError): pass else: create_simple_property(calibre, 'calibre:rating', '%g' % r) if not mi.is_null('series'): create_series(calibre, mi.series, mi.series_index) if not mi.is_null('timestamp'): create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False)) for x in ('author_link_map', 'user_categories'): val = getattr(mi, x, None) if val: create_simple_property(calibre, 'calibre:' + x, dump_dict(val)) for x in ('title_sort', 'author_sort'): if not mi.is_null(x): create_simple_property(calibre, 'calibre:' + x, getattr(mi, x)) all_user_metadata = mi.get_all_user_metadata(True) if all_user_metadata: create_user_metadata(calibre, all_user_metadata) return serialize_xmp_packet(root)
def create_periodical_index(self): # {{{ periodical_node = iter(self.oeb.toc).next() periodical_node_offset = self.serializer.body_start_offset periodical_node_size = (self.serializer.body_end_offset - periodical_node_offset) normalized_sections = [] id_offsets = self.serializer.id_offsets periodical = PeriodicalIndexEntry(periodical_node_offset, self.cncx[periodical_node.title], self.cncx[periodical_node.klass], 0) periodical.length = periodical_node_size periodical.first_child_index = 1 periodical.image_index = self.masthead_offset seen_sec_offsets = set() seen_art_offsets = set() for sec in periodical_node: normalized_articles = [] try: offset = id_offsets[sec.href] label = self.cncx[sec.title] klass = self.cncx[sec.klass] except: continue if offset in seen_sec_offsets: continue seen_sec_offsets.add(offset) section = PeriodicalIndexEntry(offset, label, klass, 1) section.parent_index = 0 for art in sec: try: offset = id_offsets[art.href] label = self.cncx[art.title] klass = self.cncx[art.klass] except: continue if offset in seen_art_offsets: continue seen_art_offsets.add(offset) article = PeriodicalIndexEntry(offset, label, klass, 2) normalized_articles.append(article) article.author_offset = self.cncx[art.author] article.desc_offset = self.cncx[art.description] if getattr(art, 'toc_thumbnail', None) is not None: try: ii = self.serializer.images[art.toc_thumbnail] - 1 if ii > -1: article.image_index = ii except KeyError: pass # Image not found in serializer if normalized_articles: normalized_articles.sort(key=lambda x: x.offset) normalized_sections.append((section, normalized_articles)) normalized_sections.sort(key=lambda x: x[0].offset) # Set lengths for s, x in enumerate(normalized_sections): sec, normalized_articles = x try: sec.length = normalized_sections[s + 1][0].offset - sec.offset except: sec.length = self.serializer.body_end_offset - sec.offset for i, art in enumerate(normalized_articles): try: art.length = normalized_articles[i + 1].offset - art.offset except: art.length = sec.offset + sec.length - art.offset # Filter for i, x in list(enumerate(normalized_sections)): sec, normalized_articles = x normalized_articles = list( filter(lambda x: x.length > 0, normalized_articles)) normalized_sections[i] = (sec, normalized_articles) normalized_sections = list( filter(lambda x: x[0].length > 0 and x[1], normalized_sections)) # Set indices i = 0 for sec, articles in normalized_sections: i += 1 sec.index = i sec.parent_index = 0 for sec, articles in normalized_sections: for art in articles: i += 1 art.index = i art.parent_index = sec.index for sec, normalized_articles in normalized_sections: sec.first_child_index = normalized_articles[0].index sec.last_child_index = normalized_articles[-1].index # Set lengths again to close up any gaps left by filtering for s, x in enumerate(normalized_sections): sec, articles = x try: next_offset = normalized_sections[s + 1][0].offset except: next_offset = self.serializer.body_end_offset sec.length = next_offset - sec.offset for a, art in enumerate(articles): try: next_offset = articles[a + 1].offset except: next_offset = sec.next_offset art.length = next_offset - art.offset # Sanity check for s, x in enumerate(normalized_sections): sec, articles = x try: next_sec = normalized_sections[s + 1][0] except: if (sec.length == 0 or sec.next_offset != self.serializer.body_end_offset): raise ValueError('Invalid section layout') else: if next_sec.offset != sec.next_offset or sec.length == 0: raise ValueError('Invalid section layout') for a, art in enumerate(articles): try: next_art = articles[a + 1] except: if (art.length == 0 or art.next_offset != sec.next_offset): raise ValueError('Invalid article layout') else: if art.length == 0 or art.next_offset != next_art.offset: raise ValueError('Invalid article layout') # Flatten indices = [periodical] for sec, articles in normalized_sections: indices.append(sec) periodical.last_child_index = sec.index for sec, articles in normalized_sections: for a in articles: indices.append(a) return indices
def mi_to_html(mi, field_list=None, default_author_link=None, use_roman_numbers=True, rating_font='Liberation Serif', rtl=False): if field_list is None: field_list = get_field_list(mi) ans = [] comment_fields = [] isdevice = not hasattr(mi, 'id') row = u'<td class="title">%s</td><td class="value">%s</td>' p = prepare_string_for_xml a = partial(prepare_string_for_xml, attribute=True) book_id = getattr(mi, 'id', 0) for field in (field for field, display in field_list if display): try: metadata = mi.metadata_for_field(field) except: continue if not metadata: continue if field == 'sort': field = 'title_sort' if metadata['is_custom'] and metadata['datatype'] in { 'bool', 'int', 'float' }: isnull = mi.get(field) is None else: isnull = mi.is_null(field) if isnull: continue name = metadata['name'] if not name: name = field name += ':' disp = metadata['display'] if metadata['datatype'] == 'comments' or field == 'comments': val = getattr(mi, field) if val: ctype = disp.get('interpret_as') or 'html' val = force_unicode(val) if ctype == 'long-text': val = '<pre style="white-space:pre-wrap">%s</pre>' % p(val) elif ctype == 'short-text': val = '<span>%s</span>' % p(val) elif ctype == 'markdown': val = markdown(val) else: val = comments_to_html(val) if disp.get('heading_position', 'hide') == 'side': ans.append((field, row % (name, val))) else: if disp.get('heading_position', 'hide') == 'above': val = '<h3 class="comments-heading">%s</h3>%s' % ( p(name), val) comment_fields.append( '<div id="%s" class="comments">%s</div>' % (field.replace('#', '_'), val)) elif metadata['datatype'] == 'rating': val = getattr(mi, field) if val: star_string = rating_to_stars( val, disp.get('allow_half_stars', False)) ans.append( (field, u'<td class="title">%s</td><td class="rating value" ' 'style=\'font-family:"%s"\'>%s</td>' % (name, rating_font, star_string))) elif metadata['datatype'] == 'composite': val = getattr(mi, field) if val: val = force_unicode(val) if disp.get('contains_html', False): ans.append((field, row % (name, comments_to_html(val)))) else: if not metadata['is_multiple']: val = '<a href="%s" title="%s">%s</a>' % ( search_action(field, val), _('Click to see books with {0}: {1}').format( metadata['name'], a(val)), p(val)) else: all_vals = [ v.strip() for v in val.split( metadata['is_multiple']['list_to_ui']) if v.strip() ] links = [ '<a href="%s" title="%s">%s</a>' % (search_action(field, x), _('Click to see books with {0}: {1}').format( metadata['name'], a(x)), p(x)) for x in all_vals ] val = metadata['is_multiple']['list_to_ui'].join(links) ans.append((field, row % (name, val))) elif field == 'path': if mi.path: path = force_unicode(mi.path, filesystem_encoding) scheme = u'devpath' if isdevice else u'path' loc = path if isdevice else book_id pathstr = _('Click to open') extra = '' if isdevice: durl = path if durl.startswith('mtp:::'): durl = ':::'.join((durl.split(':::'))[2:]) extra = '<br><span style="font-size:smaller">%s</span>' % ( prepare_string_for_xml(durl)) link = '<a href="%s" title="%s">%s</a>%s' % (action( scheme, loc=loc), prepare_string_for_xml( path, True), pathstr, extra) ans.append((field, row % (name, link))) elif field == 'formats': if isdevice: continue path = mi.path or '' bpath = '' if path: h, t = os.path.split(path) bpath = os.sep.join((os.path.basename(h), t)) data = ({ 'fmt': x, 'path': a(path or ''), 'fname': a(mi.format_files.get(x, '')), 'ext': x.lower(), 'id': book_id, 'bpath': bpath, 'sep': os.sep, 'action': action('format', book_id=book_id, fmt=x, path=path or '', fname=mi.format_files.get(x, '')) } for x in mi.formats) fmts = [ '<a title="{bpath}{sep}{fname}.{ext}" href="{action}">{fmt}</a>' .format(**x) for x in data ] ans.append((field, row % (name, ', '.join(fmts)))) elif field == 'identifiers': urls = urls_from_identifiers(mi.identifiers) links = [ '<a href="%s" title="%s:%s">%s</a>' % (action('identifier', url=url, name=namel, id_type=id_typ, value=id_val, field='identifiers', book_id=book_id), a(id_typ), a(id_val), p(namel)) for namel, id_typ, id_val, url in urls ] links = u', '.join(links) if links: ans.append((field, row % (_('Ids') + ':', links))) elif field == 'authors': authors = [] formatter = EvalFormatter() for aut in mi.authors: link = '' if mi.author_link_map.get(aut): link = lt = mi.author_link_map[aut] elif default_author_link: if isdevice and default_author_link == 'search-calibre': default_author_link = DEFAULT_AUTHOR_LINK if default_author_link.startswith('search-'): which_src = default_author_link.partition('-')[2] link, lt = author_search_href(which_src, title=mi.title, author=aut) else: vals = { 'author': qquote(aut), 'title': qquote(mi.title) } try: vals['author_sort'] = qquote( mi.author_sort_map[aut]) except KeyError: vals['author_sort'] = qquote(aut) link = lt = formatter.safe_format( default_author_link, vals, '', vals) aut = p(aut) if link: authors.append( '<a title="%s" href="%s">%s</a>' % (a(lt), action('author', url=link, name=aut, title=lt), aut)) else: authors.append(aut) ans.append((field, row % (name, ' & '.join(authors)))) elif field == 'languages': if not mi.languages: continue names = filter(None, map(calibre_langcode_to_name, mi.languages)) names = [ '<a href="%s" title="%s">%s</a>' % (search_action('languages', n), _('Search calibre for books with the language: {}').format(n), n) for n in names ] ans.append((field, row % (name, u', '.join(names)))) elif field == 'publisher': if not mi.publisher: continue val = '<a href="%s" title="%s">%s</a>' % (search_action_with_data( 'publisher', mi.publisher, book_id), _('Click to see books with {0}: {1}').format( metadata['name'], a(mi.publisher)), p(mi.publisher)) ans.append((field, row % (name, val))) elif field == 'title': # otherwise title gets metadata['datatype'] == 'text' # treatment below with a click to search link (which isn't # too bad), and a right-click 'Delete' option to delete # the title (which is bad). val = mi.format_field(field)[-1] ans.append((field, row % (name, val))) else: val = mi.format_field(field)[-1] if val is None: continue val = p(val) if metadata['datatype'] == 'series': sidx = mi.get(field + '_index') if sidx is None: sidx = 1.0 try: st = metadata['search_terms'][0] except Exception: st = field series = getattr(mi, field) val = _('%(sidx)s of <a href="%(href)s" title="%(tt)s">' '<span class="%(cls)s">%(series)s</span></a>') % dict( sidx=fmt_sidx(sidx, use_roman=use_roman_numbers), cls="series_name", series=p(series), href=search_action_with_data( st, series, book_id, field), tt=p(_('Click to see books in this series'))) elif metadata['datatype'] == 'datetime': aval = getattr(mi, field) if is_date_undefined(aval): continue elif metadata['datatype'] == 'text' and metadata['is_multiple']: try: st = metadata['search_terms'][0] except Exception: st = field all_vals = mi.get(field) if not metadata.get('display', {}).get('is_names', False): all_vals = sorted(all_vals, key=sort_key) links = [ '<a href="%s" title="%s">%s</a>' % (search_action_with_data(st, x, book_id, field), _('Click to see books with {0}: {1}').format( metadata['name'], a(x)), p(x)) for x in all_vals ] val = metadata['is_multiple']['list_to_ui'].join(links) elif metadata['datatype'] == 'text' or metadata[ 'datatype'] == 'enumeration': # text/is_multiple handled above so no need to add the test to the if try: st = metadata['search_terms'][0] except Exception: st = field val = '<a href="%s" title="%s">%s</a>' % ( search_action_with_data(st, val, book_id, field), a( _('Click to see books with {0}: {1}').format( metadata['name'], val)), p(val)) ans.append((field, row % (name, val))) dc = getattr(mi, 'device_collections', []) if dc: dc = u', '.join(sorted(dc, key=sort_key)) ans.append(('device_collections', row % (_('Collections') + ':', dc))) def classname(field): try: dt = mi.metadata_for_field(field)['datatype'] except: dt = 'text' return 'datatype_%s' % dt ans = [ u'<tr id="%s" class="%s">%s</tr>' % (fieldl.replace('#', '_'), classname(fieldl), html) for fieldl, html in ans ] # print '\n'.join(ans) direction = 'rtl' if rtl else 'ltr' margin = 'left' if rtl else 'right' return u'<style>table.fields td { vertical-align:top}</style>' + \ u'<table class="fields" style="direction: %s; margin-%s:auto">%s</table>'%( direction, margin, u'\n'.join(ans)), comment_fields
def fb2_header(self): from calibre.ebooks.oeb.base import OPF metadata = {} metadata['title'] = self.oeb_book.metadata.title[0].value metadata['appname'] = __appname__ metadata['version'] = __version__ metadata['date'] = '%i.%i.%i' % ( datetime.now().day, datetime.now().month, datetime.now().year) if self.oeb_book.metadata.language: lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value) if not lc: lc = self.oeb_book.metadata.language[0].value metadata['lang'] = lc or 'en' else: metadata['lang'] = u'en' metadata['id'] = None metadata['cover'] = self.get_cover() metadata['genre'] = self.opts.fb2_genre metadata['author'] = '' for auth in self.oeb_book.metadata.creator: author_first = '' author_middle = '' author_last = '' author_parts = auth.value.split(' ') if len(author_parts) == 1: author_last = author_parts[0] elif len(author_parts) == 2: author_first = author_parts[0] author_last = author_parts[1] else: author_first = author_parts[0] author_middle = ' '.join(author_parts[1:-1]) author_last = author_parts[-1] metadata['author'] += '<author>' metadata[ 'author'] += '<first-name>%s</first-name>' % prepare_string_for_xml( author_first) if author_middle: metadata[ 'author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml( author_middle) metadata[ 'author'] += '<last-name>%s</last-name>' % prepare_string_for_xml( author_last) metadata['author'] += '</author>' if not metadata['author']: metadata[ 'author'] = '<author><first-name></first-name><last-name></last-name></author>' metadata['keywords'] = '' tags = list(map(unicode_type, self.oeb_book.metadata.subject)) if tags: tags = ', '.join(prepare_string_for_xml(x) for x in tags) metadata['keywords'] = '<keywords>%s</keywords>' % tags metadata['sequence'] = '' if self.oeb_book.metadata.series: index = '1' if self.oeb_book.metadata.series_index: index = self.oeb_book.metadata.series_index[0] metadata['sequence'] = '<sequence name="%s" number="%s"/>' % ( prepare_string_for_xml( '%s' % self.oeb_book.metadata.series[0]), index) year = publisher = isbn = '' identifiers = self.oeb_book.metadata['identifier'] for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type( x).startswith('urn:uuid:'): metadata['id'] = unicode_type(x).split(':')[-1] break if metadata['id'] is None: self.log.warn('No UUID identifier found') metadata['id'] = unicode_type(uuid.uuid4()) try: date = self.oeb_book.metadata['date'][0] except IndexError: pass else: year = '<year>%s</year>' % prepare_string_for_xml( date.value.partition('-')[0]) try: publisher = self.oeb_book.metadata['publisher'][0] except IndexError: pass else: publisher = '<publisher>%s</publisher>' % prepare_string_for_xml( publisher.value) for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'isbn': isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value) metadata['year'], metadata['isbn'], metadata[ 'publisher'] = year, isbn, publisher for key, value in metadata.items(): if key not in ('author', 'cover', 'sequence', 'keywords', 'year', 'publisher', 'isbn'): metadata[key] = prepare_string_for_xml(value) try: comments = self.oeb_book.metadata['description'][0] except Exception: metadata['comments'] = '' else: from calibre.utils.html2text import html2text metadata['comments'] = '<annotation><p>{}</p></annotation>'.format( prepare_string_for_xml(html2text(comments.value).strip())) # Keep the indentation level of the description the same as the body. header = textwrap.dedent('''\ <FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink"> <description> <title-info> <genre>%(genre)s</genre> %(author)s <book-title>%(title)s</book-title> %(cover)s <lang>%(lang)s</lang> %(keywords)s %(sequence)s %(comments)s </title-info> <document-info> %(author)s <program-used>%(appname)s %(version)s</program-used> <date>%(date)s</date> <id>%(id)s</id> <version>1.0</version> </document-info> <publish-info> %(publisher)s %(year)s %(isbn)s </publish-info> </description>''') % metadata # Remove empty lines. return '\n'.join(filter(unicode_type.strip, header.splitlines()))
def run_file_dialog( parent=None, title=None, initial_folder=None, filename=None, save_path=None, allow_multiple=False, only_dirs=False, confirm_overwrite=True, save_as=False, no_symlinks=False, file_types=(), default_ext=None, app_uid=None ): from calibre.gui2 import sanitize_env_vars secret = os.urandom(32).replace(b'\0', b' ') pipename = '\\\\.\\pipe\\%s' % uuid4() data = [serialize_string('PIPENAME', pipename), serialize_secret(secret)] parent = parent or None if parent is not None: data.append(serialize_hwnd(get_hwnd(parent))) if title: data.append(serialize_string('TITLE', title)) if no_symlinks: data.append(serialize_binary('NO_SYMLINKS', no_symlinks)) if save_as: data.append(serialize_binary('SAVE_AS', save_as)) if confirm_overwrite: data.append(serialize_binary('CONFIRM_OVERWRITE', confirm_overwrite)) if save_path is not None: save_path = process_path(save_path) if os.path.exists(save_path): data.append(serialize_string('SAVE_PATH', save_path)) else: if not initial_folder: initial_folder = select_initial_dir(save_path) if not filename: filename = os.path.basename(save_path) else: if allow_multiple: data.append(serialize_binary('MULTISELECT', allow_multiple)) if only_dirs: data.append(serialize_binary('ONLY_DIRS', only_dirs)) if initial_folder is not None: initial_folder = process_path(initial_folder) if os.path.isdir(initial_folder): data.append(serialize_string('FOLDER', initial_folder)) if filename: if isinstance(filename, bytes): filename = os.fsdecode(filename) data.append(serialize_string('FILENAME', filename)) if only_dirs: file_types = () # file types not allowed for dir only dialogs elif not file_types: file_types = [(_('All files'), ('*',))] if file_types: data.append(serialize_file_types(file_types)) if default_ext: data.append(serialize_string('DEFAULT_EXTENSION', default_ext)) app_uid = app_uid or current_app_uid if app_uid: data.append(serialize_string('APP_UID', app_uid)) from PyQt5.Qt import QEventLoop, Qt, pyqtSignal class Loop(QEventLoop): dialog_closed = pyqtSignal() def __init__(self): QEventLoop.__init__(self) self.dialog_closed.connect(self.exit, type=Qt.ConnectionType.QueuedConnection) loop = Loop() server = PipeServer(pipename) server.start() with sanitize_env_vars(): h = Helper(subprocess.Popen( [HELPER], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE), data, loop.dialog_closed.emit) h.start() loop.exec_(QEventLoop.ProcessEventsFlag.ExcludeUserInputEvents) def decode(x): x = x or b'' try: x = x.decode('utf-8') except Exception: x = repr(x) return x def get_errors(): return decode(h.stdoutdata) + ' ' + decode(h.stderrdata) from calibre import prints from calibre.constants import DEBUG if DEBUG: prints('stdout+stderr from file dialog helper:', unicode_type([h.stdoutdata, h.stderrdata])) if h.rc != 0: raise Exception('File dialog failed (return code %s): %s' % (h.rc, get_errors())) server.join(2) if server.is_alive(): raise Exception('Timed out waiting for read from pipe to complete') if server.err_msg: raise Exception(server.err_msg) if not server.data: return () parts = list(filter(None, server.data.split(b'\0'))) if DEBUG: prints('piped data from file dialog helper:', unicode_type(parts)) if len(parts) < 2: return () if parts[0] != secret: raise Exception('File dialog failed, incorrect secret received: ' + get_errors()) from calibre_extensions.winutil import get_long_path_name def fix_path(x): u = os.path.abspath(x.decode('utf-8')) with suppress(Exception): try: return get_long_path_name(u) except FileNotFoundError: base, fn = os.path.split(u) return os.path.join(get_long_path_name(base), fn) return u ans = tuple(map(fix_path, parts[1:])) return ans
def get_words(text, lang): try: ans = split_into_words(unicode_type(text), lang) except (TypeError, ValueError): return () return list(filter(filter_words, ans))
def get_name_components(name): components = list(filter(None, [t.strip() for t in name.split('.')])) if not components or '.'.join(components) != name: components = [name] return components
def format_field_extended(self, key, series_with_index=True): from calibre.ebooks.metadata import authors_to_string ''' returns the tuple (display_name, formatted_value, original_value, field_metadata) ''' from calibre.utils.date import format_date # Handle custom series index if key.startswith('#') and key.endswith('_index'): tkey = key[:-6] # strip the _index cmeta = self.get_user_metadata(tkey, make_copy=False) if cmeta and cmeta['datatype'] == 'series': if self.get(tkey): res = self.get_extra(tkey) return (unicode_type(cmeta['name']+'_index'), self.format_series_index(res), res, cmeta) else: return (unicode_type(cmeta['name']+'_index'), '', '', cmeta) if key in self.custom_field_keys(): res = self.get(key, None) # get evaluates all necessary composites cmeta = self.get_user_metadata(key, make_copy=False) name = unicode_type(cmeta['name']) if res is None or res == '': # can't check "not res" because of numeric fields return (name, res, None, None) orig_res = res datatype = cmeta['datatype'] if datatype == 'text' and cmeta['is_multiple']: res = cmeta['is_multiple']['list_to_ui'].join(res) elif datatype == 'series' and series_with_index: if self.get_extra(key) is not None: res = res + \ ' [%s]'%self.format_series_index(val=self.get_extra(key)) elif datatype == 'datetime': res = format_date(res, cmeta['display'].get('date_format','dd MMM yyyy')) elif datatype == 'bool': res = _('Yes') if res else _('No') elif datatype == 'rating': res = '%.2g'%(res/2) elif datatype in ['int', 'float']: try: fmt = cmeta['display'].get('number_format', None) res = fmt.format(res) except: pass return (name, unicode_type(res), orig_res, cmeta) # convert top-level ids into their value if key in TOP_LEVEL_IDENTIFIERS: fmeta = field_metadata['identifiers'] name = key res = self.get(key, None) return (name, res, res, fmeta) # Translate aliases into the standard field name fmkey = field_metadata.search_term_to_field_key(key) if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field': res = self.get(key, None) fmeta = field_metadata[fmkey] name = unicode_type(fmeta['name']) if res is None or res == '': return (name, res, None, None) orig_res = res name = unicode_type(fmeta['name']) datatype = fmeta['datatype'] if key == 'authors': res = authors_to_string(res) elif key == 'series_index': res = self.format_series_index(res) elif datatype == 'text' and fmeta['is_multiple']: if isinstance(res, dict): res = [k + ':' + v for k,v in res.items()] res = fmeta['is_multiple']['list_to_ui'].join(sorted(filter(None, res), key=sort_key)) elif datatype == 'series' and series_with_index: res = res + ' [%s]'%self.format_series_index() elif datatype == 'datetime': res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy')) elif datatype == 'rating': res = '%.2g'%(res/2) elif key == 'size': res = human_readable(res) return (name, unicode_type(res), orig_res, fmeta) return (None, None, None, None)
def add_hrefs(self, data): # kindlegen inserts a trailing | after the last href self.hrefs = list(filter(None, data.decode('utf-8').split('|')))
def ACQUISITION_ENTRY(book_id, updated, request_context): field_metadata = request_context.db.field_metadata mi = request_context.db.get_metadata(book_id) extra = [] if (mi.rating or 0) > 0: rating = rating_to_stars(mi.rating) extra.append(_('RATING: %s<br />') % rating) if mi.tags: extra.append( _('TAGS: %s<br />') % xml(format_tag_string(mi.tags, None))) if mi.series: extra.append( _('SERIES: %(series)s [%(sidx)s]<br />') % dict(series=xml(mi.series), sidx=fmt_sidx(float(mi.series_index)))) for key in filter(request_context.ctx.is_field_displayable, field_metadata.ignorable_field_keys()): name, val = mi.format_field(key) if val: fm = field_metadata[key] datatype = fm['datatype'] if datatype == 'text' and fm['is_multiple']: extra.append( '%s: %s<br />' % (xml(name), xml( format_tag_string( val, fm['is_multiple']['ui_to_list'], joinval=fm['is_multiple']['list_to_ui'])))) elif datatype == 'comments' or (fm['datatype'] == 'composite' and fm['display'].get( 'contains_html', False)): extra.append('%s: %s<br />' % (xml(name), comments_to_html(unicode_type(val)))) else: extra.append('%s: %s<br />' % (xml(name), xml(unicode_type(val)))) if mi.comments: comments = comments_to_html(mi.comments) extra.append(comments) if extra: extra = html_to_lxml('\n'.join(extra)) ans = E.entry(TITLE(mi.title), E.author(E.name(authors_to_string(mi.authors))), ID('urn:uuid:' + mi.uuid), UPDATED(mi.last_modified), E.published(mi.timestamp.isoformat())) if mi.pubdate and not is_date_undefined(mi.pubdate): ans.append(ans.makeelement('{%s}date' % DC_NS)) ans[-1].text = mi.pubdate.isoformat() if len(extra): ans.append(E.content(extra, type='xhtml')) get = partial(request_context.ctx.url_for, '/get', book_id=book_id, library_id=request_context.library_id) if mi.formats: fm = mi.format_metadata for fmt in mi.formats: fmt = fmt.lower() mt = guess_type('a.' + fmt)[0] if mt: link = E.link(type=mt, href=get(what=fmt), rel="http://opds-spec.org/acquisition") ffm = fm.get(fmt.upper()) if ffm: link.set('length', unicode_type(ffm['size'])) link.set('mtime', ffm['mtime'].isoformat()) ans.append(link) ans.append( E.link(type='image/jpeg', href=get(what='cover'), rel="http://opds-spec.org/cover")) ans.append( E.link(type='image/jpeg', href=get(what='thumb'), rel="http://opds-spec.org/thumbnail")) ans.append( E.link(type='image/jpeg', href=get(what='cover'), rel="http://opds-spec.org/image")) ans.append( E.link(type='image/jpeg', href=get(what='thumb'), rel="http://opds-spec.org/image/thumbnail")) return ans
def epubify_markup(self, root, log): from calibre.ebooks.oeb.base import XPath, XHTML # Fix empty title tags for t in XPath('//h:title')(root): if not t.text: t.text = u' ' # Fix <p><div> constructs as the asinine epubchecker complains # about them pdiv = XPath('//h:p/h:div') for div in pdiv(root): div.getparent().tag = XHTML('div') # Remove the position:relative as it causes problems with some epub # renderers. Remove display: block on an image inside a div as it is # redundant and prevents text-align:center from working in ADE # Also ensure that the img is contained in its containing div imgpath = XPath('//h:div/h:img[@style]') for img in imgpath(root): div = img.getparent() if len(div) == 1: style = div.attrib.get('style', '') if style and not style.endswith(';'): style = style + ';' style += 'position:static' # Ensures position of containing div is static # Ensure that the img is always contained in its frame div.attrib['style'] = style img.attrib['style'] = 'max-width: 100%; max-height: 100%' # Handle anchored images. The default markup + CSS produced by # odf2xhtml works with WebKit but not with ADE. So we convert the # common cases of left/right/center aligned block images to work on # both webkit and ADE. We detect the case of setting the side margins # to auto and map it to an appropriate text-align directive, which # works in both WebKit and ADE. # https://bugs.launchpad.net/bugs/1063207 # https://bugs.launchpad.net/calibre/+bug/859343 imgpath = XPath('descendant::h:div/h:div/h:img') for img in imgpath(root): div2 = img.getparent() div1 = div2.getparent() if (len(div1), len(div2)) != (1, 1): continue cls = div1.get('class', '') first_rules = list( filter(None, [self.get_css_for_class(x) for x in cls.split()])) has_align = False for r in first_rules: if r.style.getProperty(u'text-align') is not None: has_align = True ml = mr = None if not has_align: aval = None cls = div2.get(u'class', u'') rules = list( filter(None, [self.get_css_for_class(x) for x in cls.split()])) for r in rules: ml = r.style.getPropertyCSSValue(u'margin-left') or ml mr = r.style.getPropertyCSSValue(u'margin-right') or mr ml = getattr(ml, 'value', None) mr = getattr(mr, 'value', None) if ml == mr == u'auto': aval = u'center' elif ml == u'auto' and mr != u'auto': aval = 'right' elif ml != u'auto' and mr == u'auto': aval = 'left' if aval is not None: style = div1.attrib.get('style', '').strip() if style and not style.endswith(';'): style = style + ';' style += 'text-align:%s' % aval has_align = True div1.attrib['style'] = style if has_align: # This is needed for ADE, without it the text-align has no # effect style = div2.attrib['style'] div2.attrib['style'] = 'display:inline;' + style
def finalize(self, all_blocks): block_counts, run_counts = Counter(), Counter() block_rmap, run_rmap = defaultdict(list), defaultdict(list) used_pairs = defaultdict(list) heading_styles = defaultdict(list) headings = frozenset('h1 h2 h3 h4 h5 h6'.split()) pure_block_styles = set() for block in all_blocks: bs = block.style block_counts[bs] += 1 block_rmap[block.style].append(block) local_run_counts = Counter() for run in block.runs: count = run.style_weight run_counts[run.style] += count local_run_counts[run.style] += count run_rmap[run.style].append(run) if local_run_counts: rs = local_run_counts.most_common(1)[0][0] used_pairs[(bs, rs)].append(block) if block.html_tag in headings: heading_styles[block.html_tag].append((bs, rs)) else: pure_block_styles.add(bs) self.pure_block_styles = sorted(pure_block_styles, key=block_counts.__getitem__) bnum = len(unicode_type(max(1, len(pure_block_styles) - 1))) for i, bs in enumerate(self.pure_block_styles): bs.id = bs.name = '%0{}d Block'.format(bnum) % i bs.seq = i if i == 0: self.normal_pure_block_style = bs counts = Counter() smap = {} for (bs, rs), blocks in iteritems(used_pairs): s = CombinedStyle(bs, rs, blocks, self.namespace) smap[(bs, rs)] = s counts[s] += sum(1 for b in blocks if not b.is_empty()) for i, heading_tag in enumerate(sorted(heading_styles)): styles = sorted((smap[k] for k in heading_styles[heading_tag]), key=counts.__getitem__) styles = list(filter(lambda s: s.outline_level is None, styles)) if styles: heading_style = styles[-1] heading_style.outline_level = i snum = len(unicode_type(max(1, len(counts) - 1))) heading_styles = [] for i, (style, count) in enumerate(counts.most_common()): if i == 0: self.normal_style = style style.id = style.name = 'Normal' else: if style.outline_level is None: val = 'Para %0{}d'.format(snum) % i else: val = 'Heading %d' % (style.outline_level + 1) heading_styles.append(style) style.id = style.name = val style.seq = i self.combined_styles = sorted(counts, key=attrgetter('seq')) [ls.apply() for ls in self.combined_styles] descendant_style_map = {} ds_counts = Counter() for block in all_blocks: for run in block.runs: if run.parent_style is not run.style and run.parent_style and run.style: ds = DescendantTextStyle(run.parent_style, run.style) if ds.properties: run.descendant_style = descendant_style_map.get(ds) if run.descendant_style is None: run.descendant_style = descendant_style_map[ ds] = ds ds_counts[run.descendant_style] += run.style_weight rnum = len(unicode_type(max(1, len(ds_counts) - 1))) for i, (text_style, count) in enumerate(ds_counts.most_common()): text_style.id = 'Text%d' % i text_style.name = '%0{}d Text'.format(rnum) % i text_style.seq = i self.descendant_text_styles = sorted(descendant_style_map, key=attrgetter('seq')) self.log.debug( '%d Text Styles %d Combined styles' % tuple(map(len, (self.descendant_text_styles, self.combined_styles)))) self.primary_heading_style = None if heading_styles: heading_styles.sort(key=attrgetter('outline_level')) self.primary_heading_style = heading_styles[0] else: ms = 0 for s in self.combined_styles: if s.rs.font_size > ms: self.primary_heading_style = s ms = s.rs.font_size
def read_metadata_kfx(stream, read_cover=True): ' Read the metadata.kfx file that is found in the sdr book folder for KFX files ' c = Container(stream.read()) m = extract_metadata(c.decode()) # dump_metadata(m) def has(x): return m[x] and m[x][0] def get(x, single=True): ans = m[x] if single: ans = clean_xml_chars(ans[0]) if ans else '' else: ans = [clean_xml_chars(y) for y in ans] return ans title = get('title') or _('Unknown') authors = get('author', False) or [_('Unknown')] auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$') def fix_author(x): if tweaks['author_sort_copy_method'] != 'copy': m = auth_pat.match(x.strip()) if m is not None: return m.group(2) + ' ' + m.group(1) return x unique_authors = [] # remove duplicates while retaining order for f in [fix_author(x) for x in authors]: if f not in unique_authors: unique_authors.append(f) mi = Metadata(title, unique_authors) if has('author'): mi.author_sort = get('author') if has('ASIN'): mi.set_identifier('mobi-asin', get('ASIN')) elif has('content_id'): mi.set_identifier('mobi-asin', get('content_id')) if has('languages'): langs = list( filter(None, (canonicalize_lang(x) for x in get('languages', False)))) if langs: mi.languages = langs if has('issue_date'): try: mi.pubdate = parse_only_date(get('issue_date')) except Exception: pass if has('publisher') and get('publisher') != 'Unknown': mi.publisher = get('publisher') if read_cover and m[COVER_KEY]: try: data = from_base64_bytes(m[COVER_KEY]) fmt, w, h = identify(data) except Exception: w, h, fmt = 0, 0, None if fmt and w > -1 and h > -1: mi.cover_data = (fmt, data) return mi