def change_domain_levels(src, dest, new_domain_level): print "Processing", src data = read_vm_file(src) clicks = {} for referrer, target, num_clicks in data: if ( referrer is None or target is None or num_clicks == -1 or domain_level(referrer) == 1 or domain_level(target) == 1 ): continue newr = change_domain_level(referrer, new_domain_level) newt = change_domain_level(target, new_domain_level) if newr not in clicks: clicks[newr] = {} if newt not in clicks[newr]: clicks[newr][newt] = 0 clicks[newr][newt] += num_clicks with open(dest, "w") as destf: writer = csv.writer(destf, delimiter="\t") for referrer in clicks: for target in clicks[referrer]: writer.writerow([referrer, target, clicks[referrer][target]])
def change_domain_levels(src, dest, new_domain_level): print "Processing", src data = read_vm_file(src) clicks = {} for referrer, target, num_clicks in data: if referrer is None or target is None or num_clicks == -1\ or domain_level(referrer) == 1 or domain_level(target) == 1: continue newr = change_domain_level(referrer, new_domain_level) newt = change_domain_level(target, new_domain_level) if newr not in clicks: clicks[newr] = {} if newt not in clicks[newr]: clicks[newr][newt] = 0 clicks[newr][newt] += num_clicks with open(dest, 'w') as destf: writer = csv.writer(destf, delimiter="\t") for referrer in clicks: for target in clicks[referrer]: writer.writerow([referrer, target, clicks[referrer][target]])
compute_entropy( os.path.join(os.getenv("TD"), "tweets", "sample-u150-c100-news-only.txt"), os.path.join(os.getenv("TR"), "twitter", "twitter-avg-user-entropy-u150-c100-news-only.txt"), os.path.join(os.getenv("TR"), "twitter", "twitter-collective-entropy-u150-c100-news-only.txt")) compute_entropy( os.path.join(os.getenv("TD"), "aol", "sample-u150-c100-news-only.txt"), os.path.join(os.getenv("TR"), "aol", "aol-avg-user-entropy-u150-c100-news-only.txt"), os.path.join(os.getenv("TR"), "aol", "aol-collective-entropy-u150-c100-news-only.txt")) compute_entropy(os.path.join(os.getenv("TD"), "tweets", "sample-u1500-c1000.txt"), os.path.join(os.getenv("TR"), "twitter", "twitter-avg-user-entropy-u1500-c1000.txt"), os.path.join(os.getenv("TR"), "twitter", "twitter-collective-entropy-u1500-c1000.txt"), url_mod=lambda url: change_domain_level(url, 2)) compute_entropy(os.path.join(os.getenv("TD"), "aol", "sample-u1500-c1000.txt"), os.path.join(os.getenv("TR"), "aol", "aol-avg-user-entropy-u1500-c1000.txt"), os.path.join(os.getenv("TR"), "aol", "aol-collective-entropy-u1500-c1000.txt"), url_mod=lambda url: change_domain_level(url, 2))
print "Writing result." with open(collective_dest, 'w') as destf: destf.write(str(h)) if __name__ == "__main__": compute_entropy( os.path.join(os.getenv("TD"), "tweets", "sample-u150-c100-news-only.txt"), os.path.join(os.getenv("TR"), "twitter", "twitter-avg-user-entropy-u150-c100-news-only.txt"), os.path.join(os.getenv("TR"), "twitter", "twitter-collective-entropy-u150-c100-news-only.txt") ) compute_entropy( os.path.join(os.getenv("TD"), "aol", "sample-u150-c100-news-only.txt"), os.path.join(os.getenv("TR"), "aol", "aol-avg-user-entropy-u150-c100-news-only.txt"), os.path.join(os.getenv("TR"), "aol", "aol-collective-entropy-u150-c100-news-only.txt") ) compute_entropy( os.path.join(os.getenv("TD"), "tweets", "sample-u1500-c1000.txt"), os.path.join(os.getenv("TR"), "twitter", "twitter-avg-user-entropy-u1500-c1000.txt"), os.path.join(os.getenv("TR"), "twitter", "twitter-collective-entropy-u1500-c1000.txt"), url_mod = lambda url: change_domain_level(url, 2) ) compute_entropy( os.path.join(os.getenv("TD"), "aol", "sample-u1500-c1000.txt"), os.path.join(os.getenv("TR"), "aol", "aol-avg-user-entropy-u1500-c1000.txt"), os.path.join(os.getenv("TR"), "aol", "aol-collective-entropy-u1500-c1000.txt"), url_mod = lambda url: change_domain_level(url, 2) )