def setLandmarks(self, landmarks): landmarks_xhtml = self.buildLandmarks(landmarks) # replace the landmarks from the navsrc with a placeholer navsrc = self.content qp = QuickXHTMLParser() qp.setContent(navsrc) nav_type = None res = [] skip_output = False for txt, tp, tname, ttype, tattr in qp.parse_iter(): if txt is not None: if not skip_output: res.append(txt) else: if tname == "nav" and ttype == "begin": nav_type = tattr.get("epub:type", None) if nav_type is not None and nav_type == "landmarks": res.append(SIGIL_REPLACE_LANDMARKS_HERE) skip_output = True continue if tname == "nav" and ttype == "end" and nav_type == "landmarks": nav_type = None skip_output = False continue if not skip_output: res.append(qp.tag_info_to_xml(tname, ttype, tattr)) navsrc = "".join(res) m = re.search(NAV_LANDMARKS_PATTERN, navsrc) if m is None: return False navsrc = navsrc[0:m.start()] + landmarks_xhtml + navsrc[m.end():] self.content = navsrc return True
def setTOC(self, toclist): toc_xhtml = self.buildTOC(toclist) # replace the TOC in the current navsrc with a placeholder navsrc = self.content qp = QuickXHTMLParser() qp.setContent(navsrc) nav_type = None res = [] skip_output = False for txt, tp, tname, ttype, tattr in qp.parse_iter(): if txt is not None: if not skip_output: res.append(txt) else: if tname == "nav" and ttype == "begin": nav_type = tattr.get("epub:type", None) if nav_type is not None and nav_type == "toc": res.append(SIGIL_REPLACE_TOC_HERE) skip_output = True continue if tname == "nav" and ttype == "end" and nav_type == "toc": nav_type = None skip_output = False continue if not skip_output: res.append(qp.tag_info_to_xml(tname, ttype, tattr)) navsrc = "".join(res) m = re.search(NAV_TOC_PATTERN, navsrc) if m is None: return False navsrc = navsrc[0:m.start()] + toc_xhtml + navsrc[m.end():] self.content = navsrc return True
def setPageList(self, pagelist): pagelist_xhtml = self.buildPageList(pagelist) # replace the pagelist from the navsrc with a placeholer navsrc = self.content qp = QuickXHTMLParser() qp.setContent(navsrc) nav_type = None res = [] skip_output = False found_page_list = False for txt, tp, tname, ttype, tattr in qp.parse_iter(): if txt is not None: if not skip_output: res.append(txt) else: if tname == "nav" and ttype == "begin": nav_type = tattr.get("epub:type", None) if nav_type is not None and nav_type == "page-list": res.append(SIGIL_REPLACE_PAGELIST_HERE) found_page_list = True skip_output = True continue if tname == "nav" and ttype == "end" and nav_type == "page-list": nav_type = None skip_output = False continue if tname == "body" and ttype == "end": if not found_page_list and len(pagelist) > 0: padding = res[-1] res.append(SIGIL_REPLACE_PAGELIST_HERE) res.append(padding) found_page_list = True if not skip_output: res.append(qp.tag_info_to_xml(tname, ttype, tattr)) navsrc = "".join(res) m = re.search(NAV_PAGELIST_PATTERN, navsrc) if m is None: return False navsrc = navsrc[0:m.start()] + pagelist_xhtml + navsrc[m.end():] self.content = navsrc return True
def getTOC(self): # parse the nav to get the table of contents navsrc = self.content toclist = [] qp = QuickXHTMLParser() qp.setContent(navsrc) lvl = 0 po = 0 title = "" nav_type = None href = None for txt, tp, tname, ttype, tattr in qp.parse_iter(): if txt is not None: if ".a." in tp or tp.endswith(".a"): title = title + txt else: title = "" else: if tname == "nav" and ttype == "begin": nav_type = tattr.get("epub:type", None) continue if tname == "nav" and ttype == "end": nav_type = None continue if nav_type is not None and nav_type == "toc": if tname == "ol": if ttype == "begin": lvl += 1 if ttype == "end": lvl -= 1 continue if tname == "a" and ttype == "begin": href = tattr.get("href", "") # must leave all url hrefs in raw url encoded form # if they can ever contain fragments continue if tname == "a" and ttype == "end": po += 1 title = xmldecode(title) toclist.append((po, lvl, href, title)) title = "" href = None continue return toclist
def getLandmarks(self): # parse the nav to get the landmarks navsrc = self.content landmarks = [] qp = QuickXHTMLParser() qp.setContent(navsrc) title = "" nav_type = None href = None epubtype = None for txt, tp, tname, ttype, tattr in qp.parse_iter(): if txt is not None: if ".a." in tp or tp.endswith(".a"): title = title + txt else: title = "" else: if tname == "nav" and ttype == "begin": nav_type = tattr.get("epub:type", None) continue if tname == "nav" and ttype == "end": nav_type = None continue if nav_type is not None and nav_type == "landmarks": if tname == "a" and ttype == "begin": href = tattr.get("href", "") # must leave all hrefs in raw url encoded form # if they can contain fragments epubtype = tattr.get("epub:type", None) continue if tname == "a" and ttype == "end": if epubtype is not None: title = xmldecode(title) landmarks.append((epubtype, href, title)) title = "" epubtype = None href = None continue return landmarks
def getPageList(self): # parse the nav source to get the page-list navsrc = self.content pagelist = [] qp = QuickXHTMLParser() qp.setContent(navsrc) pgcnt = 0 nav_type = None href = None title = "" for txt, tp, tname, ttype, tattr in qp.parse_iter(): if txt is not None: if ".a." in tp or tp.endswith(".a"): title = title + txt else: title = "" else: if tname == "nav" and ttype == "begin": nav_type = tattr.get("epub:type", None) continue if tname == "nav" and ttype == "end": nav_type = None continue if nav_type is not None and nav_type == "page-list": if tname == "a" and ttype == "begin" and nav_type == "page-list": href = tattr.get("href", "") href = unquoteurl(href) continue if tname == "a" and ttype == "end": pgcnt += 1 title = xmldecode(title) pagelist.append((pgcnt, href, title)) title = "" continue return pagelist