def alexa_rank_scanner(website): """Return the Alexa global rank.""" # Mapping: # 0 - 99: 0 # 100 - 499: 1 # ... bins = [1000, 5000, 10000, 25000, 50000, 100000, 250000, sys.maxsize] rank = website.alexa_rank return ScannerAttribute('alexa_rank', rank, bin_numeric(bins, rank), bins)
def html5_tag_scanner(website): """Scan website for the number of HTML 5 tags :param website: website to scan :return ScannerAttribute: """ count = 0 for tag in html5_tags: count += len(website.soup.find_all(tag)) bins = [1, 10, 50, sys.maxsize] return ScannerAttribute('html5_tags', count, bin_numeric(bins, count), bins)
def page_rank_scanner(website): """Scan website for its PageRank. This number is actually set on the website object, and we add it to the proprocessing by providing a scanner :param website: website to scan :return ScannerAttribute: """ bins = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, sys.maxsize] rank = website.google_page_rank return ScannerAttribute('page_rank', rank, bin_numeric(bins, rank), bins)
def alexa_load_time_scanner(website): """Return the Alexa everage load time.""" bins = [500, 1000, 1500, 2000, 2500, sys.maxsize] load_time = website.alexa_load_time return ScannerAttribute('alexa_load_time', load_time, bin_numeric(bins, load_time), bins)
def alexa_rank_dk_scanner(website): """Return the Alexa Danish rank.""" bins = [10, 50, 250, 1000, 5000, sys.maxsize] rank = website.alexa_rank_dk return ScannerAttribute('alexa_rank_dk', rank, bin_numeric(bins, rank), bins)