def calculate_diversity(data): ################################################## ### this part calculates user diversity ################################################## div = defaultdict(int) # for each user calculate diversity using formula from Finger on the Pulse for usr in range(500001): # total places visits S = float(sum(data[usr].values())) if S > 0: for subpref in data[usr].iterkeys(): if subpref <> -1: # frequency of visits to this place v = data[usr][subpref] / float(S) div[usr] -= v * math.log(v) div[usr] = div[usr] / math.log(S) # file_name = "/home/sscepano/D4D res/allstuff/diversity of travel/usr_diversity_index.tsv" # f = open(file_name,"w") # # for usr in range(1,500001): # f.write(str(usr) + '\t' + str(div[usr]) + '\n') # # print file_name # f.close() ################################################################ ### subpref diversity ################################################################ num_subpref_usrs = rd.read_in_subpref_num_users() div_subpref = defaultdict(int) for subpref in range(1,256): # for places where we have users if num_subpref_usrs[subpref] > 0: # read ids of those users subpref_usrs = rd.read_in_subpref_users(subpref) # for each user check if he belongs here (not the fastest way but still fine) for usr in range(500001): # total places visits if subpref_usrs[usr] == 1: # we sum all users diversity indexes and then avg div_subpref[subpref] += div[usr] div_subpref[subpref] = div_subpref[subpref] / float(num_subpref_usrs[subpref]) file_name2 = "/home/sscepano/D4D res/allstuff/diversity of travel/subpref_diversity_index.tsv" f2 = open(file_name2,"w") for subpref in range(1,256): if num_subpref_usrs[subpref] > 0: f2.write(str(subpref) + '\t' + str(div_subpref[subpref]) + '\n') print file_name2 f2.close() return div, div_subpref
def from_file_radius_gyr3(file_name, subpref): users_list = rd.read_in_subpref_users(subpref) total = float(rd.read_in_subpref_num_users()[subpref]) if total > 0: nits = [] its = [] # a loop where we populate those two arrays from the file i = 0 f = open(file_name, 'r') # read the file for line in f: i = i + 1 it, nit = line.split('\t') nit = float(nit) it = int(it) if users_list[it] == 1: nit = int(nit) nits.append(nit) its.append(it) mi = min(nits) mx = max(nits) print("Minimum radius of gyr ", mi) print("Maximum radius of gyr ", mx) total_nit = float(sum(nits)) print("Total radius of gyr ", total_nit) pdf_nits = defaultdict(int) for j in range(0, len(nits)): pdf_nits[nits[j]] += 1 ordered = OrderedDict(sorted(pdf_nits.items(), key=lambda t: t[0])) nits7s = [] its7s = [] test = 0 #total = 500000.0 for j in ordered.iterkeys(): nits7s.append(ordered[j]/total) test += ordered[j]/total its7s.append(j) print test ############################################################################################################################ # THIS is to plot number of users pdf ############################################################################################################################ plt.figure(7) plt.plot(its7s, nits7s, 'o', linewidth=0.5, label= 'distribution of Rg') plt.xlabel('rg [km]') plt.ylabel('P(rg)') plt.legend() # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file plt.yscale('log') plt.xscale('log') figure_name = "/home/sscepano/D4D res/allstuff/rg/1/rg_" + str(subpref) + ".png" print(figure_name) plt.savefig(figure_name, format = "png", dpi=300) plt.clf() return