def change_domain_levels(src, dest, new_domain_level):
    print "Processing", src
    data = read_vm_file(src)
    clicks = {}
    for referrer, target, num_clicks in data:
        if (
            referrer is None
            or target is None
            or num_clicks == -1
            or domain_level(referrer) == 1
            or domain_level(target) == 1
        ):
            continue
        newr = change_domain_level(referrer, new_domain_level)
        newt = change_domain_level(target, new_domain_level)
        if newr not in clicks:
            clicks[newr] = {}
        if newt not in clicks[newr]:
            clicks[newr][newt] = 0
        clicks[newr][newt] += num_clicks

    with open(dest, "w") as destf:
        writer = csv.writer(destf, delimiter="\t")
        for referrer in clicks:
            for target in clicks[referrer]:
                writer.writerow([referrer, target, clicks[referrer][target]])
Esempio n. 2
0
def change_domain_levels(src, dest, new_domain_level):
	print "Processing", src
	data = read_vm_file(src)
	clicks = {}
	for referrer, target, num_clicks in data:
		if referrer is None or target is None or num_clicks == -1\
		or domain_level(referrer) == 1 or domain_level(target) == 1:
			continue
		newr = change_domain_level(referrer, new_domain_level)
		newt = change_domain_level(target, new_domain_level)
		if newr not in clicks:
			clicks[newr] = {}
		if newt not in clicks[newr]:
			clicks[newr][newt] = 0
		clicks[newr][newt] += num_clicks
	
	with open(dest, 'w') as destf:
		writer = csv.writer(destf, delimiter="\t")
		for referrer in clicks:
			for target in clicks[referrer]:
				writer.writerow([referrer, target, clicks[referrer][target]])
Esempio n. 3
0
    compute_entropy(
        os.path.join(os.getenv("TD"), "tweets",
                     "sample-u150-c100-news-only.txt"),
        os.path.join(os.getenv("TR"), "twitter",
                     "twitter-avg-user-entropy-u150-c100-news-only.txt"),
        os.path.join(os.getenv("TR"), "twitter",
                     "twitter-collective-entropy-u150-c100-news-only.txt"))

    compute_entropy(
        os.path.join(os.getenv("TD"), "aol", "sample-u150-c100-news-only.txt"),
        os.path.join(os.getenv("TR"), "aol",
                     "aol-avg-user-entropy-u150-c100-news-only.txt"),
        os.path.join(os.getenv("TR"), "aol",
                     "aol-collective-entropy-u150-c100-news-only.txt"))

    compute_entropy(os.path.join(os.getenv("TD"), "tweets",
                                 "sample-u1500-c1000.txt"),
                    os.path.join(os.getenv("TR"), "twitter",
                                 "twitter-avg-user-entropy-u1500-c1000.txt"),
                    os.path.join(os.getenv("TR"), "twitter",
                                 "twitter-collective-entropy-u1500-c1000.txt"),
                    url_mod=lambda url: change_domain_level(url, 2))

    compute_entropy(os.path.join(os.getenv("TD"), "aol",
                                 "sample-u1500-c1000.txt"),
                    os.path.join(os.getenv("TR"), "aol",
                                 "aol-avg-user-entropy-u1500-c1000.txt"),
                    os.path.join(os.getenv("TR"), "aol",
                                 "aol-collective-entropy-u1500-c1000.txt"),
                    url_mod=lambda url: change_domain_level(url, 2))
	print "Writing result."
	with open(collective_dest, 'w') as destf:
		destf.write(str(h))

if __name__ == "__main__":
	compute_entropy(
		os.path.join(os.getenv("TD"), "tweets", "sample-u150-c100-news-only.txt"), 
		os.path.join(os.getenv("TR"), "twitter", "twitter-avg-user-entropy-u150-c100-news-only.txt"),
		os.path.join(os.getenv("TR"), "twitter", "twitter-collective-entropy-u150-c100-news-only.txt")
	)

	compute_entropy(
		os.path.join(os.getenv("TD"), "aol", "sample-u150-c100-news-only.txt"), 
		os.path.join(os.getenv("TR"), "aol", "aol-avg-user-entropy-u150-c100-news-only.txt"),
		os.path.join(os.getenv("TR"), "aol", "aol-collective-entropy-u150-c100-news-only.txt")
	)

	compute_entropy(
		os.path.join(os.getenv("TD"), "tweets", "sample-u1500-c1000.txt"), 
		os.path.join(os.getenv("TR"), "twitter", "twitter-avg-user-entropy-u1500-c1000.txt"),
		os.path.join(os.getenv("TR"), "twitter", "twitter-collective-entropy-u1500-c1000.txt"),
		url_mod = lambda url: change_domain_level(url, 2)
	)

	compute_entropy(
		os.path.join(os.getenv("TD"), "aol", "sample-u1500-c1000.txt"), 
		os.path.join(os.getenv("TR"), "aol", "aol-avg-user-entropy-u1500-c1000.txt"),
		os.path.join(os.getenv("TR"), "aol", "aol-collective-entropy-u1500-c1000.txt"),
		url_mod = lambda url: change_domain_level(url, 2)
	)