def get_pdf_content(pages, toc): """ :type pages: flask.ext.flatpages.flatpages.FlatPages :param pages: """ content = [] for toc_section in toc: section = {"id": toc_section["title"].replace(" ", "_"), "title": toc_section["title"], "content": []} for reference in toc_section["items"]: url = reference["url"] if url.startswith("/"): url = url[1:] if url.endswith(".html"): url = url[:-5] if url == "docs/reference/grammar": page_html = render_template("pages/grammar.html", kotlinGrammar=get_grammar()).replace("<br>", "<br/>") document = BeautifulSoup(page_html, "html.parser") document = document.find("div", {"class": "grammar"}) page_id = "grammar" title = "Grammar" else: page = pages.get(url) if page is None: continue title = page.meta["title"] document = BeautifulSoup(page.html, "html.parser") page_id = page.path.split("/")[-1] for element in document.find_all(): if "id" in element.attrs: element.attrs["id"] = page_id + "_" + element.attrs["id"] if element.name == "a": if "href" not in element.attrs: continue href = element.attrs["href"] url = urlparse(href) if url.scheme == "": if href.startswith("#"): new_href = page_id + "_" + href[1:] else: url_path = url.path[:-5] if url.path.endswith(".html") else url.path new_href = url_path + ("_" + url.fragment if url.fragment != "" else "") element.attrs["href"] = "#" + new_href header_regex = re.compile("^h(\d)$") if header_regex.match(element.name): level = int(header_regex.match(element.name).group(1)) + 1 element.name = "h" + str(level) section["content"].append({"id": page_id, "title": title, "content": document.decode()}) content.append(section) drive, root_folder_path_rest = path.splitdrive(root_folder_path) page_html = render_template( "pdf.html", content=content, root_folder=(drive + root_folder_path_rest).replace("\\", "/") ) return page_html
def get_pdf_content(build_mode: bool, pages: MyFlatPages, toc: Dict) -> str: content = [] for toc_section in toc['content']: section = { 'id': toc_section['title'].replace(' ', '_'), 'title': toc_section['title'], 'content': [] } for reference in toc_section['content']: url = reference['url'] if url.startswith('/'): url = url[1:] if url.endswith('.html'): url = url[:-5] if url == "docs/reference/grammar": page_html = render_template( 'pages/grammar.html', kotlinGrammar=get_grammar(build_mode)).replace( "<br>", "<br/>") document = BeautifulSoup(page_html, 'html.parser') document = document.find("div", {"class": "grammar"}) page_id = "grammar" title = "Grammar" else: page = pages.get(url) if page is None: continue title = page.meta['title'] document = BeautifulSoup(page.html, 'html.parser') page_id = page.path.split('/')[-1] for element in document.find_all(): if 'id' in element.attrs: element.attrs['id'] = page_id + '_' + element.attrs['id'] if element.name == "a": if 'href' not in element.attrs: continue href = element.attrs['href'] url = urlparse(href) if url.scheme == "": if href.startswith('#'): new_href = page_id + '_' + href[1:] else: url_path = url.path.split("/")[-1] url_path = url_path[:-5] if url_path.endswith( ".html") else url_path new_href = url_path + ('_' + url.fragment if url.fragment != "" else "") element.attrs['href'] = "#" + new_href header_regex = re.compile('^h(\d)$') if header_regex.match(element.name): level = int(header_regex.match(element.name).group(1)) + 1 element.name = 'h' + str(level) section['content'].append({ 'id': page_id, 'title': title, 'content': document.decode() }) content.append(section) drive, root_folder_path_rest = path.splitdrive(root_folder_path) page_html = render_template( 'pdf.html', content=content, root_folder=(drive + root_folder_path_rest).replace('\\', '/')) return page_html
def grammar(): grammar = get_grammar(build_mode) if grammar is None: return "Grammar file not found", 404 return render_template('pages/grammar.html', kotlinGrammar=grammar)
def get_pdf_content(pages: MyFlatPages, toc: Dict) -> str: content = [] for toc_section in toc['content']: section = { 'id': toc_section['title'].replace(' ', '_'), 'title': toc_section['title'], 'content': [] } for reference in toc_section['content']: url = reference['url'] if url.startswith('/'): url = url[1:] if url.endswith('.html'): url = url[:-5] if url == "docs/reference/grammar": page_html = render_template('pages/grammar.html', kotlinGrammar=get_grammar()).replace("<br>", "<br/>") document = BeautifulSoup(page_html, 'html.parser') document = document.find("div", {"class": "grammar"}) page_id = "grammar" title = "Grammar" else: page = pages.get(url) if page is None: continue title = page.meta['title'] document = BeautifulSoup(page.html, 'html.parser') page_id = page.path.split('/')[-1] for element in document.find_all(): if 'id' in element.attrs: element.attrs['id'] = page_id + '_' + element.attrs['id'] if element.name == "a": if 'href' not in element.attrs: continue href = element.attrs['href'] url = urlparse(href) if url.scheme == "": if href.startswith('#'): new_href = page_id + '_' + href[1:] else: url_path = url.path.split("/")[-1] url_path = url_path[:-5] if url_path.endswith(".html") else url_path new_href = url_path + ('_' + url.fragment if url.fragment != "" else "") element.attrs['href'] = "#" + new_href header_regex = re.compile('^h(\d)$') if header_regex.match(element.name): level = int(header_regex.match(element.name).group(1)) + 1 element.name = 'h' + str(level) section['content'].append({ 'id': page_id, 'title': title, 'content': document.decode() }) content.append(section) drive, root_folder_path_rest = path.splitdrive(root_folder_path) page_html = render_template('pdf.html', content=content, root_folder=(drive + root_folder_path_rest) .replace('\\', '/')) return page_html
def grammar(): grammar = get_grammar() if grammar is None: return "Grammar file not found", 404 return render_template('pages/grammar.html', kotlinGrammar=grammar)