def get_document_to_index(self, route): """Render a page and parse it using BeautifulSoup Args: path (str): route of the page to be parsed Returns: document (_dict): A dictionary with title, path and content """ frappe.set_user("Guest") frappe.local.no_cache = True try: set_request(method="GET", path=route) content = render_page(route) soup = BeautifulSoup(content, "html.parser") page_content = soup.find(class_="page_content") text_content = page_content.text if page_content else "" title = soup.title.text.strip() if soup.title else route return frappe._dict(title=title, content=text_content, path=route) except Exception: pass finally: frappe.set_user("Administrator")
def add_route_to_global_search(route): from frappe.website.render import render_page from frappe.tests.test_website import set_request frappe.set_user('Guest') frappe.local.no_cache = True try: set_request(method='GET', path=route) content = render_page(route) soup = BeautifulSoup(content, 'html.parser') page_content = soup.find(class_='page_content') text_content = page_content.text if page_content else '' title = soup.title.text.strip() if soup.title else route value = dict(doctype='Static Web Page', name=route, content=text_content, published=1, title=title, route=route) sync_value_in_queue(value) except (frappe.PermissionError, frappe.DoesNotExistError, frappe.ValidationError, Exception): pass frappe.set_user('Administrator')
def add_route_to_global_search(route): from bs4 import BeautifulSoup from frappe.utils import set_request from frappe.website.render import render_page frappe.set_user("Guest") frappe.local.no_cache = True try: set_request(method="GET", path=route) content = render_page(route) soup = BeautifulSoup(content, "html.parser") page_content = soup.find(class_="page_content") text_content = page_content.text if page_content else "" title = soup.title.text.strip() if soup.title else route value = dict( doctype="Static Web Page", name=route, content=text_content, published=1, title=title, route=route, ) sync_value_in_queue(value) except (frappe.PermissionError, frappe.DoesNotExistError, frappe.ValidationError, Exception): pass frappe.set_user("Administrator")
def sync_global_search(): '''Sync page content in global search''' from frappe.website.render import render_page from frappe.utils.global_search import sync_global_search from bs4 import BeautifulSoup if frappe.flags.update_global_search: sync_global_search() frappe.flags.update_global_search = [] frappe.session.user = '******' frappe.local.no_cache = True frappe.db.sql( 'delete from __global_search where doctype="Static Web Page"') for app in frappe.get_installed_apps(frappe_last=True): app_path = frappe.get_app_path(app) folders = frappe.local.flags.web_pages_folders or ('www', 'templates/pages') for start in folders: for basepath, folders, files in os.walk( os.path.join(app_path, start)): for f in files: if f.endswith('.html') or f.endswith('.md'): path = os.path.join(basepath, f.rsplit('.', 1)[0]) try: content = render_page(path) soup = BeautifulSoup(content, 'html.parser') text = '' route = os.path.relpath( path, os.path.join(app_path, start)) for div in soup.findAll("div", {'class': 'page-content'}): text += div.text frappe.flags.update_global_search.append( dict(doctype='Static Web Page', name=route, content=frappe.unicode(text), published=1, title=soup.title.string, route=route)) except Exception: pass sync_global_search()
def sync_global_search(): '''Sync page content in global search''' from frappe.website.render import render_page from frappe.utils.global_search import sync_global_search from bs4 import BeautifulSoup if frappe.flags.update_global_search: sync_global_search() frappe.flags.update_global_search = [] frappe.session.user = '******' frappe.local.no_cache = True frappe.db.sql('delete from __global_search where doctype="Static Web Page"') for app in frappe.get_installed_apps(frappe_last=True): app_path = frappe.get_app_path(app) folders = get_start_folders() for start in folders: for basepath, folders, files in os.walk(os.path.join(app_path, start)): for f in files: if f.endswith('.html') or f.endswith('.md'): path = os.path.join(basepath, f.rsplit('.', 1)[0]) try: content = render_page(path) soup = BeautifulSoup(content, 'html.parser') text = '' route = os.path.relpath(path, os.path.join(app_path, start)) for div in soup.findAll("div", {'class':'page-content'}): text += div.text frappe.flags.update_global_search.append( dict(doctype='Static Web Page', name=route, content=text_type(text), published=1, title=text_type(soup.title.string), route=route)) except Exception: pass sync_global_search()
def get_document_to_index(route): frappe.set_user("Guest") frappe.local.no_cache = True try: set_request(method="GET", path=route) content = render_page(route) soup = BeautifulSoup(content, "html.parser") page_content = soup.find(class_="page_content") text_content = page_content.text if page_content else "" title = soup.title.text.strip() if soup.title else route frappe.set_user("Administrator") return frappe._dict(title=title, content=text_content, path=route) except ( frappe.PermissionError, frappe.DoesNotExistError, frappe.ValidationError, Exception, ): pass
def test_error_page_rendering(self): content = render_page("error") self.assertIn("Error", content)