def folder(url, dirname): location = url + dirname msg = (htmlconv.h3(('>-- ' + dirname))) try: f = urllib.request.urlopen(location).getcode() except urllib.error.HTTPError as err: return (msg + htmlconv.perror(dirname + ' ' + str(err))) return (msg + htmlconv.p('found at ' + location))
def file(url, filename): location = (url + filename) msg = (htmlconv.h3('>-- ' + filename + ' File')) try: f = urllib.request.urlopen(location).getcode() except urllib.error.HTTPError as err: return (msg + htmlconv.perror(filename + ' ' + str(err))) return (msg + htmlconv.p('found at ' + url))
output.append(htmlconv.h1(('Scanning URL ' + str(urls[0])))) urls.pop(0) # DO NOT MESS WITH THIS FOR LOOP; IF YOU GET IT WRONG IT CAN ESCAPE YOUR URL AND # GO INTO THE WILD. YOU ARE WARNED.. for tag in soup.findAll('a', href=True): tag['href'] = urljoin(url, tag['href']) if url in tag['href'] and tag['href'] not in visited: urls.append(tag['href']) visited.append(tag['href']) output.append(htmlconv.h2('HTML Structure')) if len(soup.findAll('head')) == 0: output.append(htmlconv.perror('You should have a <code><head></code> section in your HTML5')) elif len(soup.findAll('head')) == 1: output.append(htmlconv.psuccess('Single <code><head></code> section found for page; ✔')) else: output.append(htmlconv.perror('You should only have a single <code><head></code> section per page. You have used more than one on this page')) output.append(htmlconv.p(('Meta:' + str(len(soup.findAll('meta')))))) output.append(htmlconv.p(('Links:' + str(len(soup.findAll('link', href=True)))))) if len(soup.findAll('nav')) == 0: output.append(htmlconv.perror('You should have a nav section in your HTML5')) elif len(soup.findAll('nav')) == 1: output.append(htmlconv.psuccess('Single <code><nav></code> section found for page; ✔')) else: output.append(htmlconv.perror('You should only have a single <code><nav></code> tag per page. You have used more than one on this page') )