def find_page_breaks(self, item): if self.page_break_selectors is None: self.page_break_selectors = set() stylesheets = [x.data for x in self.oeb.manifest if x.media_type in OEB_STYLES] for rule in rules(stylesheets): before = force_unicode(getattr(rule.style.getPropertyCSSValue( 'page-break-before'), 'cssText', '').strip().lower()) after = force_unicode(getattr(rule.style.getPropertyCSSValue( 'page-break-after'), 'cssText', '').strip().lower()) try: if before and before not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add((rule.selectorText, True)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-before') except: pass try: if after and after not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add((rule.selectorText, False)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-after') except: pass page_breaks = set() select = Select(item.data) if not self.page_break_selectors: return [], [] body = item.data.xpath('//h:body', namespaces=NAMESPACES) if not body: return [], [] descendants = frozenset(body[0].iterdescendants('*')) for selector, before in self.page_break_selectors: try: for elem in select(selector): if elem in descendants and elem.tag.rpartition('}')[2].lower() not in {'html', 'body', 'head', 'style', 'script', 'meta', 'link'}: elem.set('pb_before', '1' if before else '0') page_breaks.add(elem) except SelectorError as err: self.log.warn('Ignoring page breaks specified with invalid CSS selector: %r (%s)' % (selector, as_unicode(err))) for i, elem in enumerate(item.data.iter('*')): try: elem.set('pb_order', str(i)) except TypeError: # Cant set attributes on comment nodes etc. continue page_breaks = list(page_breaks) page_breaks.sort(key=lambda x:int(x.get('pb_order'))) page_break_ids, page_breaks_ = [], [] for i, x in enumerate(page_breaks): x.set('id', x.get('id', 'calibre_pb_%d'%i)) id = x.get('id') try: xp = XPath('//*[@id="%s"]'%id) except: try: xp = XPath("//*[@id='%s']"%id) except: # The id has both a quote and an apostrophe or some other # Just replace it since I doubt its going to work anywhere else # either id = 'calibre_pb_%d'%i x.set('id', id) xp = XPath('//*[@id=%r]'%id) page_breaks_.append((xp, x.get('pb_before', '0') == '1')) page_break_ids.append(id) for elem in item.data.iter(etree.Element): elem.attrib.pop('pb_order', False) elem.attrib.pop('pb_before', False) return page_breaks_, page_break_ids
def find_page_breaks(self, item): if self.page_break_selectors is None: from calibre.ebooks.oeb.stylizer import fix_namespace css_to_xpath = HTMLTranslator().css_to_xpath self.page_break_selectors = set([]) stylesheets = [ x.data for x in self.oeb.manifest if x.media_type in OEB_STYLES ] for rule in rules(stylesheets): before = getattr( rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower() after = getattr( rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower() try: if before and before not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add((XPath( fix_namespace(css_to_xpath(rule.selectorText))), True)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-before') except: pass try: if after and after not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add((XPath( fix_namespace(css_to_xpath(rule.selectorText))), False)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-after') except: pass page_breaks = set([]) for selector, before in self.page_break_selectors: body = item.data.xpath('//h:body', namespaces=NAMESPACES) if not body: continue for elem in selector(body[0]): if elem not in body: elem.set('pb_before', '1' if before else '0') page_breaks.add(elem) for i, elem in enumerate(item.data.iter()): try: elem.set('pb_order', str(i)) except TypeError: # Cant set attributes on comment nodes etc. continue page_breaks = list(page_breaks) page_breaks.sort(key=lambda x: int(x.get('pb_order'))) page_break_ids, page_breaks_ = [], [] for i, x in enumerate(page_breaks): x.set('id', x.get('id', 'calibre_pb_%d' % i)) id = x.get('id') try: xp = XPath('//*[@id="%s"]' % id) except: try: xp = XPath("//*[@id='%s']" % id) except: # The id has both a quote and an apostrophe or some other # Just replace it since I doubt its going to work anywhere else # either id = 'calibre_pb_%d' % i x.set('id', id) xp = XPath('//*[@id=%r]' % id) page_breaks_.append((xp, x.get('pb_before', '0') == '1')) page_break_ids.append(id) for elem in item.data.iter(): elem.attrib.pop('pb_order', False) elem.attrib.pop('pb_before', False) return page_breaks_, page_break_ids
def find_page_breaks(self, item): if self.page_break_selectors is None: self.page_break_selectors = set() stylesheets = [ x.data for x in self.oeb.manifest if x.media_type in OEB_STYLES ] for rule in rules(stylesheets): before = getattr( rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower() after = getattr( rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower() try: if before and before not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add( (rule.selectorText, True)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-before') except: pass try: if after and after not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add( (rule.selectorText, False)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-after') except: pass page_breaks = set() select = Select(item.data) if not self.page_break_selectors: return [], [] body = item.data.xpath('//h:body', namespaces=NAMESPACES) if not body: return [], [] descendants = frozenset(body[0].iterdescendants('*')) for selector, before in self.page_break_selectors: try: for elem in select(selector): if elem in descendants and elem.tag.rpartition( '}')[2].lower() not in { 'html', 'body', 'head', 'style', 'script', 'meta', 'link' }: elem.set('pb_before', '1' if before else '0') page_breaks.add(elem) except SelectorError as err: self.log.warn( 'Ignoring page breaks specified with invalid CSS selector: %r (%s)' % (selector, as_unicode(err))) for i, elem in enumerate(item.data.iter('*')): try: elem.set('pb_order', str(i)) except TypeError: # Cant set attributes on comment nodes etc. continue page_breaks = list(page_breaks) page_breaks.sort(key=lambda x: int(x.get('pb_order'))) page_break_ids, page_breaks_ = [], [] for i, x in enumerate(page_breaks): x.set('id', x.get('id', 'calibre_pb_%d' % i)) id = x.get('id') try: xp = XPath('//*[@id="%s"]' % id) except: try: xp = XPath("//*[@id='%s']" % id) except: # The id has both a quote and an apostrophe or some other # Just replace it since I doubt its going to work anywhere else # either id = 'calibre_pb_%d' % i x.set('id', id) xp = XPath('//*[@id=%r]' % id) page_breaks_.append((xp, x.get('pb_before', '0') == '1')) page_break_ids.append(id) for elem in item.data.iter(etree.Element): elem.attrib.pop('pb_order', False) elem.attrib.pop('pb_before', False) return page_breaks_, page_break_ids
def find_page_breaks(self, item): if self.page_break_selectors is None: from calibre.ebooks.oeb.stylizer import fix_namespace css_to_xpath = HTMLTranslator().css_to_xpath self.page_break_selectors = set([]) stylesheets = [x.data for x in self.oeb.manifest if x.media_type in OEB_STYLES] for rule in rules(stylesheets): before = getattr(rule.style.getPropertyCSSValue( 'page-break-before'), 'cssText', '').strip().lower() after = getattr(rule.style.getPropertyCSSValue( 'page-break-after'), 'cssText', '').strip().lower() try: if before and before not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add((XPath(fix_namespace(css_to_xpath(rule.selectorText))), True)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-before') except: pass try: if after and after not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add((XPath(fix_namespace(css_to_xpath(rule.selectorText))), False)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-after') except: pass page_breaks = set([]) for selector, before in self.page_break_selectors: body = item.data.xpath('//h:body', namespaces=NAMESPACES) if not body: continue for elem in selector(body[0]): if elem not in body: elem.set('pb_before', '1' if before else '0') page_breaks.add(elem) for i, elem in enumerate(item.data.iter()): try: elem.set('pb_order', str(i)) except TypeError: # Cant set attributes on comment nodes etc. continue page_breaks = list(page_breaks) page_breaks.sort(key=lambda x:int(x.get('pb_order'))) page_break_ids, page_breaks_ = [], [] for i, x in enumerate(page_breaks): x.set('id', x.get('id', 'calibre_pb_%d'%i)) id = x.get('id') try: xp = XPath('//*[@id="%s"]'%id) except: try: xp = XPath("//*[@id='%s']"%id) except: # The id has both a quote and an apostrophe or some other # Just replace it since I doubt its going to work anywhere else # either id = 'calibre_pb_%d'%i x.set('id', id) xp = XPath('//*[@id=%r]'%id) page_breaks_.append((xp, x.get('pb_before', '0') == '1')) page_break_ids.append(id) for elem in item.data.iter(): elem.attrib.pop('pb_order', False) elem.attrib.pop('pb_before', False) return page_breaks_, page_break_ids