def scrape_from_nanapi_and_build_heading_tree(): query = request.form['query'] head = 'http://nanapi.jp/search/q:' query_url = head + query nanapi_search_result_page = WebPage(query_url) nanapi_search_result_page.fetch_html() urls = nanapi_search_result_page.find_urls_from_nanapi_search_result() results = [] for url in urls: # result_pageはnanapiの1記事 result_page = WebPage(url) result_page.fetch_html() result_page.set_title() # task_steps => [task_step, task_step, ...] result_page.build_heading_tree() result = {'title': result_page.title, 'nodes': result_page.top_nodes, 'url': result_page.url} results.append(result) return render_template('headings_and_li_texts.tmpl', results=results)
def scrape_from_nanapi(): query = request.form['query'] head = 'http://nanapi.jp/search/q:' query_url = head + query nanapi_search_result_page = WebPage(query_url) nanapi_search_result_page.fetch_html() urls = nanapi_search_result_page.find_urls_from_nanapi_search_result() tasks = [] for url in urls: # result_pageはnanapiの1記事 result_page = WebPage(url) result_page.fetch_html() # task_steps => [task_step, task_step, ...] task = result_page.find_task_from_nanapi_with_headings() # task_steps[0].h2 => 'はじめに' # task_steps[0].h3s[0] => 'はじめに' tasks.append(task) # tasks => [task, task, ...] # tasks[0][0].h2 => 'はじめに' return render_template('nanapi_tasks.tmpl', tasks=tasks)