Esempio n. 1
0
def create_bucket_from_domain(seed_domain, total_sites=False):
	"""Creates a an ad bucket according to the parameters."""
	
	c = create_connection()
	
	#get some similar sites
	sites_by_similar = find_by_similarsites(c, starter_site=seed_domain)
	sites_by_html = find_by_html(c, starter_site=seed_domain)
	
	#merge, get rank and sort descending
	all_sites = []
	for repository in [sites_by_html, sites_by_similar]:
		for site in repository:
			domain = site['url'] if 'url' in site else site['domain']
			rank = get_rank(c, domain)
			if rank:
				if domain != seed_domain:
					all_sites.append([domain, rank])
	
	all_sites = sorted(all_sites, key=lambda x: x[1])
	
	if verbose:
		print tabulate(all_sites[:15 if not total_sites else total_sites])
	else:
		return all_sites
Esempio n. 2
0
def create_bucket_from_domain(seed_domain, total_sites=False):
    """Creates a an ad bucket according to the parameters."""

    c = create_connection()

    #get some similar sites
    sites_by_similar = find_by_similarsites(c, starter_site=seed_domain)
    sites_by_html = find_by_html(c, starter_site=seed_domain)

    #merge, get rank and sort descending
    all_sites = []
    for repository in [sites_by_html, sites_by_similar]:
        for site in repository:
            domain = site['url'] if 'url' in site else site['domain']
            rank = get_rank(c, domain)
            if rank:
                if domain != seed_domain:
                    all_sites.append([domain, rank])

    all_sites = sorted(all_sites, key=lambda x: x[1])

    if verbose:
        print tabulate(all_sites[:15 if not total_sites else total_sites])
    else:
        return all_sites
Esempio n. 3
0
def create_bucket_from_keywords(search_string, total_sites=False):
	"""Given some input text, this creates a bucket"""
	
	c = create_connection()
	
	#get some similar sites
	sites_by_html = find_by_html(c, starter_text=search_string)
	if verbose: print "Found {0} sites by meta desc".format(len(sites_by_html))
	
	#find similar sites to the top 10% (random number?)
	all_sites = []
	ten_pc = int(len(sites_by_html) / 10)
	for site in sites_by_html[:ten_pc]:
		domain = site['url'] if 'url' in site else site['domain']
		sites_by_similar = find_by_similarsites(c, starter_site=domain)
		ten_pc = int(len(sites_by_similar) / 10) #take top 10%
		if verbose: print "Adding {0} sites by similarsites".format(ten_pc)
		all_sites += sites_by_similar[:ten_pc]
	
	#prepend original sites_by_html to all_sites
	all_sites = sites_by_html + all_sites
	if verbose: print "Total sites: {0}".format(len(all_sites))
	
	#get rank and sort descending
	ranked_sites = []
	for site in all_sites:
		domain = site['url'] if 'url' in site else site['domain']
		rank = get_rank(c, domain)
		if rank:
			ranked_sites.append([domain, rank])
	
	ranked_sites = sorted(ranked_sites, key=lambda x: x[1])
	
	if verbose:
		print tabulate(ranked_sites[:15 if not total_sites else total_sites])
	else:
		return ranked_sites
Esempio n. 4
0
def create_bucket_from_keywords(search_string, total_sites=False):
    """Given some input text, this creates a bucket"""

    c = create_connection()

    #get some similar sites
    sites_by_html = find_by_html(c, starter_text=search_string)
    if verbose: print "Found {0} sites by meta desc".format(len(sites_by_html))

    #find similar sites to the top 10% (random number?)
    all_sites = []
    ten_pc = int(len(sites_by_html) / 10)
    for site in sites_by_html[:ten_pc]:
        domain = site['url'] if 'url' in site else site['domain']
        sites_by_similar = find_by_similarsites(c, starter_site=domain)
        ten_pc = int(len(sites_by_similar) / 10)  #take top 10%
        if verbose: print "Adding {0} sites by similarsites".format(ten_pc)
        all_sites += sites_by_similar[:ten_pc]

    #prepend original sites_by_html to all_sites
    all_sites = sites_by_html + all_sites
    if verbose: print "Total sites: {0}".format(len(all_sites))

    #get rank and sort descending
    ranked_sites = []
    for site in all_sites:
        domain = site['url'] if 'url' in site else site['domain']
        rank = get_rank(c, domain)
        if rank:
            ranked_sites.append([domain, rank])

    ranked_sites = sorted(ranked_sites, key=lambda x: x[1])

    if verbose:
        print tabulate(ranked_sites[:15 if not total_sites else total_sites])
    else:
        return ranked_sites