def calculate_fq_subpref_from_usr_file(): usr_home = rd.read_in_user_home_subprefs() subpref_num_users = rd.read_in_subpref_num_users() subpref_fq = defaultdict(float) file_name = "/home/sscepano/D4D res/ORGANIZED/SET3/Distr of Num and Fq of Calls/new results -- check the same/Users_and_their_calling_fq.tsv" f = open(file_name, 'r') for line in f: usr, fq = line.split('\t') usr = int(usr) fq = float(fq) subpref_fq[usr_home[usr]] += fq for subpref in range(256): if subpref_num_users[subpref] > 0: subpref_fq[subpref] = subpref_fq[subpref] / float(subpref_num_users[subpref]) file_name2 = "/home/sscepano/D4D res/ORGANIZED/SET3/Distr of Num and Fq of Calls/subpref/Subpref_avg_calling_fq_check.tsv" f2 = open(file_name2, 'w') for subpref in range(256): if subpref_num_users[subpref] > 0: f2.write(str(subpref) + '\t' + str(subpref_fq[subpref]) + '\n') return subpref_fq #calculate_fq_subpref_from_usr_file()
def calculate_traj_subpref_from_usr_file(): usr_home = rd.read_in_user_home_subprefs() subpref_num_users = rd.read_in_subpref_num_users() subpref_rg = defaultdict(float) file_name = "/home/sscepano/D4D res/ORGANIZED/SET3/Clustering/usr res/usr_traj_length.tsv" f = open(file_name, 'r') for line in f: usr, rg = line.split('\t') usr = int(usr) rg = float(rg) subpref_rg[usr_home[usr]] += rg for subpref in range(256): if subpref_num_users[subpref] > 0: subpref_rg[subpref] = subpref_rg[subpref] / float(subpref_num_users[subpref]) file_name2 = "/home/sscepano/D4D res/allstuff/CLUSTERING/subpref res/subpref_avg_traj.tsv" f2 = open(file_name2, 'w') for subpref in range(256): if subpref_num_users[subpref] > 0: f2.write(str(subpref) + '\t' + str(subpref_rg[subpref]) + '\n') return subpref_rg
def data_to_files(data): num_visited_subprefs_per_subpref = defaultdict(int) num_visited_subprefs_per_subpref_scaled = defaultdict(int) usr_home = rd.read_in_user_home_subprefs() num_usrs = rd.read_in_subpref_num_users() for usr in range(500001): num_visited_subprefs_per_subpref[usr_home[usr]] += data[usr].sum() for subpref in range(256): if num_usrs[subpref] <> 0: num_visited_subprefs_per_subpref_scaled[subpref] = num_visited_subprefs_per_subpref[subpref] / num_usrs[subpref] file1 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/usr_num_visited_subprefs.tsv" file2 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/subpref_summed_num_visited_subprefs.tsv" file3 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/subpref_scaled_num_visited_subprefs.tsv" f1 = open(file1, "w") f2 = open(file2, "w") f3 = open(file3, "w") for usr in range(500001): f1.write(str(usr) + '\t' + str(n.sum(data[usr])) + '\n') for subpref in range(256): if num_visited_subprefs_per_subpref[subpref] <> 0: f2.write(str(subpref) + '\t' + str(num_visited_subprefs_per_subpref[subpref]) + '\n') for subpref in range(256): if num_visited_subprefs_per_subpref_scaled[subpref] <> 0: f3.write(str(subpref) + '\t' + str(num_visited_subprefs_per_subpref_scaled[subpref]) + '\n') print file1 print file2 print file3 return
def save_data_to_matrix(home_calls, last_usr_loc_n_dist): usr_home = rd.read_in_user_home_subprefs() subpref_avg_fq = rd.read_in_subpref_avg_fq() subpref_num_usr = rd.read_in_subpref_num_users() subpref_calls = defaultdict(int) subpref_outside_calls = defaultdict(int) subpref_pct_inside_calls = defaultdict(int) subpref_total_traj = defaultdict(int) file_o = "/home/sscepano/D4D res/allstuff/CLUSTERING/ALL_SUBPREF_clustering_args.tsv" f_o = open(file_o, "w") for usr in range(500001): subpref_calls[usr_home[usr]] += home_calls[usr][0] subpref_outside_calls[usr_home[usr]] += home_calls[usr][1] subpref_total_traj[usr_home[usr]] += last_usr_loc_n_dist[usr][1] print len(subpref_outside_calls) print len(subpref_calls) print len(subpref_total_traj) print len(subpref_avg_fq) # for usr_id in usr_home: # if usr_home[usr_id] == 0: # print usr_id for subpref_id in range(256): suma = float(subpref_calls[subpref_id] + subpref_outside_calls[subpref_id]) if suma <> 0: subpref_pct_inside_calls[subpref_id] = subpref_calls[subpref_id] / suma subpref_total_traj[subpref_id] = subpref_total_traj[subpref_id] / subpref_num_usr[subpref_id] f_o.write(str(subpref_id) + '\t' + str(subpref_calls[subpref_id]) + '\t' + str(subpref_outside_calls[subpref_id]) + '\t' + \ str(subpref_pct_inside_calls[subpref_id]) + '\t' + str(subpref_total_traj[subpref_id]) + '\t' + str(subpref_avg_fq[subpref_id]) + '\n') return