def calculate_traj_subpref_from_usr_file(): usr_home = rd.read_in_user_home_subprefs() subpref_num_users = rd.read_in_subpref_num_users() subpref_rg = defaultdict(float) file_name = "/home/sscepano/D4D res/ORGANIZED/SET3/Clustering/usr res/usr_traj_length.tsv" f = open(file_name, 'r') for line in f: usr, rg = line.split('\t') usr = int(usr) rg = float(rg) subpref_rg[usr_home[usr]] += rg for subpref in range(256): if subpref_num_users[subpref] > 0: subpref_rg[subpref] = subpref_rg[subpref] / float(subpref_num_users[subpref]) file_name2 = "/home/sscepano/D4D res/allstuff/CLUSTERING/subpref res/subpref_avg_traj.tsv" f2 = open(file_name2, 'w') for subpref in range(256): if subpref_num_users[subpref] > 0: f2.write(str(subpref) + '\t' + str(subpref_rg[subpref]) + '\n') return subpref_rg
def calculate_fq_subpref_from_usr_file(): usr_home = rd.read_in_user_home_subprefs() subpref_num_users = rd.read_in_subpref_num_users() subpref_fq = defaultdict(float) file_name = "/home/sscepano/D4D res/ORGANIZED/SET3/Distr of Num and Fq of Calls/new results -- check the same/Users_and_their_calling_fq.tsv" f = open(file_name, 'r') for line in f: usr, fq = line.split('\t') usr = int(usr) fq = float(fq) subpref_fq[usr_home[usr]] += fq for subpref in range(256): if subpref_num_users[subpref] > 0: subpref_fq[subpref] = subpref_fq[subpref] / float(subpref_num_users[subpref]) file_name2 = "/home/sscepano/D4D res/ORGANIZED/SET3/Distr of Num and Fq of Calls/subpref/Subpref_avg_calling_fq_check.tsv" f2 = open(file_name2, 'w') for subpref in range(256): if subpref_num_users[subpref] > 0: f2.write(str(subpref) + '\t' + str(subpref_fq[subpref]) + '\n') return subpref_fq #calculate_fq_subpref_from_usr_file()
def calculate_diversity(data): ################################################## ### this part calculates user diversity ################################################## div = defaultdict(int) # for each user calculate diversity using formula from Finger on the Pulse for usr in range(500001): # total places visits S = float(sum(data[usr].values())) if S > 0: for subpref in data[usr].iterkeys(): if subpref <> -1: # frequency of visits to this place v = data[usr][subpref] / float(S) div[usr] -= v * math.log(v) div[usr] = div[usr] / math.log(S) # file_name = "/home/sscepano/D4D res/allstuff/diversity of travel/usr_diversity_index.tsv" # f = open(file_name,"w") # # for usr in range(1,500001): # f.write(str(usr) + '\t' + str(div[usr]) + '\n') # # print file_name # f.close() ################################################################ ### subpref diversity ################################################################ num_subpref_usrs = rd.read_in_subpref_num_users() div_subpref = defaultdict(int) for subpref in range(1,256): # for places where we have users if num_subpref_usrs[subpref] > 0: # read ids of those users subpref_usrs = rd.read_in_subpref_users(subpref) # for each user check if he belongs here (not the fastest way but still fine) for usr in range(500001): # total places visits if subpref_usrs[usr] == 1: # we sum all users diversity indexes and then avg div_subpref[subpref] += div[usr] div_subpref[subpref] = div_subpref[subpref] / float(num_subpref_usrs[subpref]) file_name2 = "/home/sscepano/D4D res/allstuff/diversity of travel/subpref_diversity_index.tsv" f2 = open(file_name2,"w") for subpref in range(1,256): if num_subpref_usrs[subpref] > 0: f2.write(str(subpref) + '\t' + str(div_subpref[subpref]) + '\n') print file_name2 f2.close() return div, div_subpref
def save_subpref_visited_pl(): subpref_num_users = rd.read_in_subpref_num_users() subpref_pl = rd.read_in_subpref_num_places() file_name2 = "/home/sscepano/D4D res/allstuff/CLUSTERING/subpref res/subpref_avg_num_visited_pl.tsv" f2 = open(file_name2, 'w') for subpref in range(256): if subpref_num_users[subpref] > 0: f2.write(str(subpref) + '\t' + str(subpref_pl[subpref]) + '\n') return subpref_pl
def recalculate_num_visits_outside(): total_visits = rd.read_in_BACK_num_of_visits_outisde_only() scaled_visits = defaultdict(float) num_usrs = rd.read_in_subpref_num_users() file_out = "/home/sscepano/D4D res/allstuff/CLUSTERING/num_outside_visits_scaled.tsv" f = open(file_out, "w") for subpref in range(256): if num_usrs[subpref] <> 0: scaled_visits[subpref] = total_visits[subpref] / num_usrs[subpref] print scaled_visits[subpref] f.write(str(subpref) + '\t' + str(scaled_visits[subpref]) + '\n') return
def data_to_files(data): num_visited_subprefs_per_subpref = defaultdict(int) num_visited_subprefs_per_subpref_scaled = defaultdict(int) usr_home = rd.read_in_user_home_subprefs() num_usrs = rd.read_in_subpref_num_users() for usr in range(500001): num_visited_subprefs_per_subpref[usr_home[usr]] += data[usr].sum() for subpref in range(256): if num_usrs[subpref] <> 0: num_visited_subprefs_per_subpref_scaled[subpref] = num_visited_subprefs_per_subpref[subpref] / num_usrs[subpref] file1 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/usr_num_visited_subprefs.tsv" file2 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/subpref_summed_num_visited_subprefs.tsv" file3 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/subpref_scaled_num_visited_subprefs.tsv" f1 = open(file1, "w") f2 = open(file2, "w") f3 = open(file3, "w") for usr in range(500001): f1.write(str(usr) + '\t' + str(n.sum(data[usr])) + '\n') for subpref in range(256): if num_visited_subprefs_per_subpref[subpref] <> 0: f2.write(str(subpref) + '\t' + str(num_visited_subprefs_per_subpref[subpref]) + '\n') for subpref in range(256): if num_visited_subprefs_per_subpref_scaled[subpref] <> 0: f3.write(str(subpref) + '\t' + str(num_visited_subprefs_per_subpref_scaled[subpref]) + '\n') print file1 print file2 print file3 return
def save_data_to_matrix(home_calls, last_usr_loc_n_dist): usr_home = rd.read_in_user_home_subprefs() subpref_avg_fq = rd.read_in_subpref_avg_fq() subpref_num_usr = rd.read_in_subpref_num_users() subpref_calls = defaultdict(int) subpref_outside_calls = defaultdict(int) subpref_pct_inside_calls = defaultdict(int) subpref_total_traj = defaultdict(int) file_o = "/home/sscepano/D4D res/allstuff/CLUSTERING/ALL_SUBPREF_clustering_args.tsv" f_o = open(file_o, "w") for usr in range(500001): subpref_calls[usr_home[usr]] += home_calls[usr][0] subpref_outside_calls[usr_home[usr]] += home_calls[usr][1] subpref_total_traj[usr_home[usr]] += last_usr_loc_n_dist[usr][1] print len(subpref_outside_calls) print len(subpref_calls) print len(subpref_total_traj) print len(subpref_avg_fq) # for usr_id in usr_home: # if usr_home[usr_id] == 0: # print usr_id for subpref_id in range(256): suma = float(subpref_calls[subpref_id] + subpref_outside_calls[subpref_id]) if suma <> 0: subpref_pct_inside_calls[subpref_id] = subpref_calls[subpref_id] / suma subpref_total_traj[subpref_id] = subpref_total_traj[subpref_id] / subpref_num_usr[subpref_id] f_o.write(str(subpref_id) + '\t' + str(subpref_calls[subpref_id]) + '\t' + str(subpref_outside_calls[subpref_id]) + '\t' + \ str(subpref_pct_inside_calls[subpref_id]) + '\t' + str(subpref_total_traj[subpref_id]) + '\t' + str(subpref_avg_fq[subpref_id]) + '\n') return
def save_data_to_file(data): num_users = rd.read_in_subpref_num_users() #print data[60] #subpref = 250 total_weekday_pattern = defaultdict(int) for subpref in range(256): for hr in data[subpref]: total_weekday_pattern[hr] += data[subpref][hr] # from_data_to_timeplot3(total_weekday_pattern, subpref) # for subpref in range(256): # cum_data = get_cumulative_call_timing_data(data[subpref]) # from_data_to_timeplot2(cum_data, subpref) # wake_up_hour = defaultdict(int) # sleep_hour = defaultdict(int) # # for subpref in range(256): # cum_data = get_cumulative_call_timing_data(data[subpref]) # sleep_hour[subpref] = analyze_cum_calling_data2(cum_data) # wake_up_hour[subpref] = analyze_cum_calling_data1(cum_data) # # file_name = "/home/sscepano/D4D res/allstuff/call timing/subpref_weekend_wake_up_sleep_hour.tsv" # f = open(file_name,"w") # # num_users = rd.read_in_subpref_num_users() # # for subpref in wake_up_hour.iterkeys(): # if num_users[subpref] > 0: # f.write(str(subpref) + '\t' + str(wake_up_hour[subpref]) + '\t' + str(sleep_hour[subpref]) + '\n') # pct_night_calls = defaultdict(float) # # for subpref in range(256): # pct_night_calls[subpref] = pct_calls_at_night(data[subpref]) # # file_name = "/home/sscepano/D4D res/allstuff/call timing/subpref_weekdend_pct_night_calls.tsv" # f = open(file_name,"w") # # for subpref in pct_night_calls.iterkeys(): # if num_users[subpref] > 0: # f.write(str(subpref) + '\t' + str(pct_night_calls[subpref]) + '\n') # for subpref in data.iterkeys(): # file_name = "/home/sscepano/D4D res/allstuff/call timing/subpref all files/by day/calls_per_day_" + str(subpref) + ".tsv" # f = open(file_name, "w") # # for hr in data[subpref].iterkeys(): # f.write(str(hr) + '\t' + str(data[subpref][hr]) + '\n') # # f.close() file_name = "/home/sscepano/D4D res/allstuff/call timing/total_calls_per_day.tsv" f = open(file_name, "w") for hr in total_weekday_pattern.iterkeys(): f.write(str(hr) + '\t' + str(total_weekday_pattern[hr]) + '\n') f.close() return
def from_file_radius_gyr3(file_name, subpref): users_list = rd.read_in_subpref_users(subpref) total = float(rd.read_in_subpref_num_users()[subpref]) if total > 0: nits = [] its = [] # a loop where we populate those two arrays from the file i = 0 f = open(file_name, 'r') # read the file for line in f: i = i + 1 it, nit = line.split('\t') nit = float(nit) it = int(it) if users_list[it] == 1: nit = int(nit) nits.append(nit) its.append(it) mi = min(nits) mx = max(nits) print("Minimum radius of gyr ", mi) print("Maximum radius of gyr ", mx) total_nit = float(sum(nits)) print("Total radius of gyr ", total_nit) pdf_nits = defaultdict(int) for j in range(0, len(nits)): pdf_nits[nits[j]] += 1 ordered = OrderedDict(sorted(pdf_nits.items(), key=lambda t: t[0])) nits7s = [] its7s = [] test = 0 #total = 500000.0 for j in ordered.iterkeys(): nits7s.append(ordered[j]/total) test += ordered[j]/total its7s.append(j) print test ############################################################################################################################ # THIS is to plot number of users pdf ############################################################################################################################ plt.figure(7) plt.plot(its7s, nits7s, 'o', linewidth=0.5, label= 'distribution of Rg') plt.xlabel('rg [km]') plt.ylabel('P(rg)') plt.legend() # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file plt.yscale('log') plt.xscale('log') figure_name = "/home/sscepano/D4D res/allstuff/rg/1/rg_" + str(subpref) + ".png" print(figure_name) plt.savefig(figure_name, format = "png", dpi=300) plt.clf() return