def main(url, resolve_dupes=True): """Simple wrapper, just calls the resolver for now Drops the last letter of the passed `url` and then grabs all URLs that start with the remaining `string` :param url: `string` of the base URL to start with :param resolve_dupes: `Boolean` whether or not to re-process existing entries. :rtype: None """ db = init_db_conn() base_url = url[:-2] for i in CHARSET: for j in CHARSET: url = '%s%s%s' % (base_url, chr(i), chr(j)) # If skipping existing entries, check for this URL and skip # if we already have it if not resolve_dupes: existing = get_result(db, url) if existing: continue bitly = resolve_url(url) if bitly.status != 404: sys.stdout.write('%s\t%s\n' % (bitly.content_type, bitly.path[-1])) save_result(db, bitly)
def do_GET(self): # Not the most efficient way to connect to the DB, would be # better to connect once and retain, but, I'm lazy. db = init_db_conn() # Pop off headers self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write("<html><head><title>bit.ly grinder</title></head>") self.wfile.write("<body><ul>") if self.path == '/images': """Show all matching images""" results = get_results_by_content_type(db, content_type='image/%') for res in results: self.wfile.write('<br><img src="%s" />%s - %s<br>\n' % (res.path[-1], res.path[0], res.path[-1])) elif self.path == '/nonhtml': results = get_results(db, status=200, exclude_content='text/html') self.print_links(results) elif self.path == '/nonhtml-all': results = get_results(db, exclude_content='text/html') self.print_links(results) elif self.path == '/all': results = get_results(db) self.print_links(results) else: results = get_results(db, status=200) self.print_links(results) self.wfile.write("</ul></body></html>")