def filter_targets(src, dest): print "Processing %s" % src data = read_aol_data(src) with open(dest, 'w') as destf: writer = csv.writer(destf, delimiter="\t") writer.writerow(['user', 'url']) for user, dest in data: if not should_skip_host(dest): writer.writerow([user, dest])
def compute_user_volume(src, dest): print "Reading tweets from", src visits = read_aol_data(src) print "Computing volume." volumes = {} for visit in visits: user = int(visit[0]) if user not in volumes: volumes[user] = 0 volumes[user] += 1 with open(dest, 'w') as destf: writer = csv.writer(destf, delimiter="\t") for user, clicks in volumes.items(): writer.writerow([user, clicks])