def generate_table_of_contents(soup, prefix): header_ids = Counter() headers = soup.findAll(header_re) if not headers: return tocdiv = Tag(soup, "div", [("class", "toc")]) parent = Tag(soup, "ul") parent.level = 0 tocdiv.append(parent) level = 0 previous = 0 for header in headers: contents = u''.join(header.findAll(text=True)) # In the event of an empty header, skip if not contents: continue # Convert html entities to avoid ugly header ids aid = unicode( BeautifulSoup(contents, convertEntities=BeautifulSoup.XML_ENTITIES)) # Prefix with PREFIX_ to avoid ID conflict with the rest of the page aid = u'%s_%s' % (prefix, aid.replace(" ", "_").lower()) # Convert down to ascii replacing special characters with hex aid = str(title_re.sub(lambda c: '.%X' % ord(c.group()), aid)) # Check to see if a tag with the same ID exists id_num = header_ids[aid] + 1 header_ids[aid] += 1 # Only start numbering ids with the second instance of an id if id_num > 1: aid = '%s%d' % (aid, id_num) header['id'] = aid li = Tag(soup, "li", [("class", aid)]) a = Tag(soup, "a", [("href", "#%s" % aid)]) a.string = contents li.append(a) thislevel = int(header.name[-1]) if previous and thislevel > previous: newul = Tag(soup, "ul") newul.level = thislevel newli = Tag(soup, "li", [("class", "toc_child")]) newli.append(newul) parent.append(newli) parent = newul level += 1 elif level and thislevel < previous: while level and parent.level > thislevel: parent = parent.findParent("ul") level -= 1 previous = thislevel parent.append(li) return tocdiv
def generate_table_of_contents(soup, prefix): header_ids = Counter() headers = soup.findAll(header_re) if not headers: return tocdiv = Tag(soup, "div", [("class", "toc")]) parent = Tag(soup, "ul") parent.level = 0 tocdiv.append(parent) level = 0 previous = 0 for header in headers: contents = u''.join(header.findAll(text=True)) # In the event of an empty header, skip if not contents: continue # Convert html entities to avoid ugly header ids aid = unicode(BeautifulSoup(contents, convertEntities=BeautifulSoup.XML_ENTITIES)) # Prefix with PREFIX_ to avoid ID conflict with the rest of the page aid = u'%s_%s' % (prefix, aid.replace(" ", "_").lower()) # Convert down to ascii replacing special characters with hex aid = str(title_re.sub(lambda c: '.%X' % ord(c.group()), aid)) # Check to see if a tag with the same ID exists id_num = header_ids[aid] + 1 header_ids[aid] += 1 # Only start numbering ids with the second instance of an id if id_num > 1: aid = '%s%d' % (aid, id_num) header['id'] = aid li = Tag(soup, "li", [("class", aid)]) a = Tag(soup, "a", [("href", "#%s" % aid)]) a.string = contents li.append(a) thislevel = int(header.name[-1]) if previous and thislevel > previous: newul = Tag(soup, "ul") newul.level = thislevel newli = Tag(soup, "li", [("class", "toc_child")]) newli.append(newul) parent.append(newli) parent = newul level += 1 elif level and thislevel < previous: while level and parent.level > thislevel: parent = parent.findParent("ul") level -= 1 previous = thislevel parent.append(li) return tocdiv