def url_handler(): # given user input URL, check if URL is valid, # parse HTML, store elements and frequencies in dictionary # encode HTML, add spans and return to template url = request.args.get('url') try: html_object = requests.get(url) except requests.exceptions.RequestException: return 'The URL you entered is invalid, or is having trouble connecting at this time.\ Please enter a new URL.' # decode requests unicode object html = get_html(html_object) # create dictionary of tags and counts tags = create_count(html) # excape < > and & characters so html can be displayed html = encode_html(html) # wrap individual tags with spans so they can be highlighted html = add_spans(html) return render_template('results.html', url=url, html=html, tags=tags)
def fetch_html(): """Given URL, fetch html, parse it, and store elements and frequencies as a Python dictionary.""" input_url = request.form.get('input_url') try: # Fetch HTML of input url and store as unicode page = requests.get(input_url) except (requests.exceptions.ConnectionError, requests.exceptions.InvalidURL): flash('The URL you entered is either invalid or unavailable. Try again!') return redirect('/') else: html = page.text # Replace <, > with HTML entities to display on page raw_html = encode_html(html) # Add spans to each element so jQuery can select and apply highlight class # Have to pass raw_html as Markup object to properly display span_html = Markup(add_spans(raw_html)) # Convert HTML unicode to lxml Tree, build element histogram tree = lxml.html.fromstring(page.text) frequency = build_element_histogram(tree) # Keep track of URL, omitting http:// prefix display_url = input_url[7:] return render_template('results.html', frequency=frequency, raw_html=span_html, website=display_url)