def build_selector(text, case_sensitive=True): func = css_to_xpath if case_sensitive else ci_css_to_xpath try: return etree.XPath(fix_namespace(func(text)), namespaces=XPNSMAP) except Exception: return None
def find_page_breaks(self, item): if self.page_break_selectors is None: from calibre.ebooks.oeb.stylizer import fix_namespace css_to_xpath = HTMLTranslator().css_to_xpath self.page_break_selectors = set([]) stylesheets = [ x.data for x in self.oeb.manifest if x.media_type in OEB_STYLES ] for rule in rules(stylesheets): before = getattr( rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower() after = getattr( rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower() try: if before and before not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add((XPath( fix_namespace(css_to_xpath(rule.selectorText))), True)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-before') except: pass try: if after and after not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add((XPath( fix_namespace(css_to_xpath(rule.selectorText))), False)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-after') except: pass page_breaks = set([]) for selector, before in self.page_break_selectors: body = item.data.xpath('//h:body', namespaces=NAMESPACES) if not body: continue for elem in selector(body[0]): if elem not in body: elem.set('pb_before', '1' if before else '0') page_breaks.add(elem) for i, elem in enumerate(item.data.iter()): try: elem.set('pb_order', str(i)) except TypeError: # Cant set attributes on comment nodes etc. continue page_breaks = list(page_breaks) page_breaks.sort(key=lambda x: int(x.get('pb_order'))) page_break_ids, page_breaks_ = [], [] for i, x in enumerate(page_breaks): x.set('id', x.get('id', 'calibre_pb_%d' % i)) id = x.get('id') try: xp = XPath('//*[@id="%s"]' % id) except: try: xp = XPath("//*[@id='%s']" % id) except: # The id has both a quote and an apostrophe or some other # Just replace it since I doubt its going to work anywhere else # either id = 'calibre_pb_%d' % i x.set('id', id) xp = XPath('//*[@id=%r]' % id) page_breaks_.append((xp, x.get('pb_before', '0') == '1')) page_break_ids.append(id) for elem in item.data.iter(): elem.attrib.pop('pb_order', False) elem.attrib.pop('pb_before', False) return page_breaks_, page_break_ids
def find_page_breaks(self, item): if self.page_break_selectors is None: from calibre.ebooks.oeb.stylizer import fix_namespace css_to_xpath = HTMLTranslator().css_to_xpath self.page_break_selectors = set([]) stylesheets = [x.data for x in self.oeb.manifest if x.media_type in OEB_STYLES] for rule in rules(stylesheets): before = getattr(rule.style.getPropertyCSSValue("page-break-before"), "cssText", "").strip().lower() after = getattr(rule.style.getPropertyCSSValue("page-break-after"), "cssText", "").strip().lower() try: if before and before not in {"avoid", "auto", "inherit"}: self.page_break_selectors.add((XPath(fix_namespace(css_to_xpath(rule.selectorText))), True)) if self.remove_css_pagebreaks: rule.style.removeProperty("page-break-before") except: pass try: if after and after not in {"avoid", "auto", "inherit"}: self.page_break_selectors.add((XPath(fix_namespace(css_to_xpath(rule.selectorText))), False)) if self.remove_css_pagebreaks: rule.style.removeProperty("page-break-after") except: pass page_breaks = set([]) for selector, before in self.page_break_selectors: body = item.data.xpath("//h:body", namespaces=NAMESPACES) if not body: continue for elem in selector(body[0]): if elem not in body: elem.set("pb_before", "1" if before else "0") page_breaks.add(elem) for i, elem in enumerate(item.data.iter()): try: elem.set("pb_order", str(i)) except TypeError: # Cant set attributes on comment nodes etc. continue page_breaks = list(page_breaks) page_breaks.sort(key=lambda x: int(x.get("pb_order"))) page_break_ids, page_breaks_ = [], [] for i, x in enumerate(page_breaks): x.set("id", x.get("id", "calibre_pb_%d" % i)) id = x.get("id") try: xp = XPath('//*[@id="%s"]' % id) except: try: xp = XPath("//*[@id='%s']" % id) except: # The id has both a quote and an apostrophe or some other # Just replace it since I doubt its going to work anywhere else # either id = "calibre_pb_%d" % i x.set("id", id) xp = XPath("//*[@id=%r]" % id) page_breaks_.append((xp, x.get("pb_before", "0") == "1")) page_break_ids.append(id) for elem in item.data.iter(): elem.attrib.pop("pb_order", False) elem.attrib.pop("pb_before", False) return page_breaks_, page_break_ids