def create_bucket_from_domain(seed_domain, total_sites=False): """Creates a an ad bucket according to the parameters.""" c = create_connection() #get some similar sites sites_by_similar = find_by_similarsites(c, starter_site=seed_domain) sites_by_html = find_by_html(c, starter_site=seed_domain) #merge, get rank and sort descending all_sites = [] for repository in [sites_by_html, sites_by_similar]: for site in repository: domain = site['url'] if 'url' in site else site['domain'] rank = get_rank(c, domain) if rank: if domain != seed_domain: all_sites.append([domain, rank]) all_sites = sorted(all_sites, key=lambda x: x[1]) if verbose: print tabulate(all_sites[:15 if not total_sites else total_sites]) else: return all_sites
def create_bucket_from_keywords(search_string, total_sites=False): """Given some input text, this creates a bucket""" c = create_connection() #get some similar sites sites_by_html = find_by_html(c, starter_text=search_string) if verbose: print "Found {0} sites by meta desc".format(len(sites_by_html)) #find similar sites to the top 10% (random number?) all_sites = [] ten_pc = int(len(sites_by_html) / 10) for site in sites_by_html[:ten_pc]: domain = site['url'] if 'url' in site else site['domain'] sites_by_similar = find_by_similarsites(c, starter_site=domain) ten_pc = int(len(sites_by_similar) / 10) #take top 10% if verbose: print "Adding {0} sites by similarsites".format(ten_pc) all_sites += sites_by_similar[:ten_pc] #prepend original sites_by_html to all_sites all_sites = sites_by_html + all_sites if verbose: print "Total sites: {0}".format(len(all_sites)) #get rank and sort descending ranked_sites = [] for site in all_sites: domain = site['url'] if 'url' in site else site['domain'] rank = get_rank(c, domain) if rank: ranked_sites.append([domain, rank]) ranked_sites = sorted(ranked_sites, key=lambda x: x[1]) if verbose: print tabulate(ranked_sites[:15 if not total_sites else total_sites]) else: return ranked_sites