def from_file_home_outside_calls(): file1 = "/home/sscepano/D4D res/ORGANIZED/SET3/Clustering/usr res/usr_home_calls.tsv" file2 = "/home/sscepano/D4D res/ORGANIZED/SET3/Clustering/usr res/usr_outside_calls.tsv" usr_home_c = n.zeros(500001) usr_outside_c = n.zeros(500001) i = 0 f1 = open(file1, 'r') f2 = open(file2, 'r') # read the file1 for line in f1: i = i + 1 u, home_c = line.split('\t') home_c = float(home_c) u = int(u) usr_home_c[u] = home_c # read the file for line in f2: i = i + 1 u, outside_c = line.split('\t') outside_c = float(outside_c) u = int(u) usr_outside_c[u] = outside_c ############################################################################################################################ # THIS is to plot pdf of home calls ############################################################################################################################ fig1 = plt.figure(1) ax = fig1.add_subplot(211) nn, bins, rectangles = ax.hist(usr_home_c, 100, normed=True) #plt.plot(nc_distr_pct, 'ro', linewidth=0.5, label= 'pdf Num of calls') plt.xlabel('NcH, number of calls from the home subprefecture') plt.ylabel('P(NcH)') plt.legend() # # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file plt.yscale('log') plt.xscale('log') # figure_name = "/home/sscepano/D4D res/allstuff/rg/pdf rg loglog.png" #this is a regular plot file, then comment the previous loglog block #figure_name = "/home/sscepano/D4D res/allstuff/rg/pdf home calls.png" #print(figure_name) #plt.savefig(figure_name, format = "png") ############################################################################################################################ # THIS is to plot pdf of outside calls ############################################################################################################################ #fig1 = plt.figure(1) ax = fig1.add_subplot(212) nn, bins, rectangles = ax.hist(usr_outside_c, 100, normed=True) #plt.plot(nc_distr_pct, 'ro', linewidth=0.5, label= 'pdf Num of calls') plt.xlabel('NcO, number of calls from outside the home subprefecture') plt.ylabel('P(NcO)') plt.legend() # # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file plt.yscale('log') plt.xscale('log') figure_name = "/home/sscepano/D4D res/allstuff/rg/pdf home outside calls.png" # #this is a regular plot file, then comment the previous loglog block # figure_name = "/home/sscepano/D4D res/allstuff/rg/pdf home outside calls.png" print(figure_name) plt.savefig(figure_name, format = "png") return # invoke the function for plotting number of calls and frequency probability distribution (percents of users) #from_file_home_outside_calls()
def from_file_num_calls(file_name): # here we store the num of calls made by a user usr_and_his_num_calls = n.zeros(500001, dtype=n.int) # here we store the fq of calls made by a user, but we calculate it from the num_calls fq_distr = n.zeros(max_num_calls) # from the previous array obtained by counting users who made the same total number of calls nc_distr = n.zeros(max_num_calls, dtype=n.int) # here we just save percents of users nc_distr_pct = n.zeros(max_num_calls) # a loop where we populate those two arrays from the file i = 0 f = open(file_name, 'r') # read the file for line in f: i = i + 1 u, nc = line.split('\t') nc = int(nc) u = int(u) usr_and_his_num_calls[u] = nc nc_distr[nc] += 1 mi = min(usr_and_his_num_calls) mx = max(usr_and_his_num_calls) print("Minimum number of calls ", mi) print("Maximum number of calls ", mx) total_u = float(sum(nc_distr)) print("Total users found ", total_u) # test_file_out = "/home/sscepano/D4D res/allstuff/SET3 frequent callers from python/1/Obtained_num_calls_and_its_pct.tsv" # fto = open(test_file_out,"w") for j in range(0, max_num_calls): nc_distr_pct[j] = (nc_distr[j] / total_u) # fto.write(str(j) + '\t' + str(nc_distr_pct[j]) + '\n') # # I was just checking that the total percents sums up to 100 and they do but it looked funny as we have so small values # total = 0 # for i in range (max_num_calls): # total += percent_users[i] # print ("Check ", total) ############################################################################################################################ # THIS is to plot number of users pdf ############################################################################################################################ plt.figure(1) plt.plot(nc_distr_pct, 'r', linewidth=0.5, label= 'distribution of N') plt.xlabel('N, num of calls') plt.ylabel('% Users') plt.legend() # # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file plt.yscale('log') plt.xscale('log') # figure_name = "/home/sscepano/D4D res/allstuff/SET3 frequent callers from python/1/SET3 distr of num of calls loglog.png" #this is a regular plot file, then comment the previous loglog block figure_name = "/home/sscepano/D4D res/allstuff/SET3 frequent callers from python/1/distr of num of calls2.png" print(figure_name) plt.savefig(figure_name, format = "png", pdi=300) #plt.show() ############################################################################################################################### # THIS is to plot fq pdf ############################################################################################################################### plt.figure(2) # fq = [] # # for j in range(max_num_calls): # fq.append( float(j / 3360.0)) # # ffq = [] # # for j in range(max_num_calls): # ffq.append(nc_distr_pct[j]) # for j in range(0, max_num_calls): # nc_distr_pct[j] = (nc_distr[j] / total_u) fq = [] for j in range(max_num_calls): fq.append( float(j / 3360.0)) ffq = [] for j in range(max_num_calls): ffq.append(nc_distr_pct[j]) # test_file_out2 = "/home/sscepano/D4D res/allstuff/SET3 frequent callers from python/1/Calculated_fq_calls_and_its_pct2.tsv" # fto2 = open(test_file_out2,"w") # for j in range(0, max_num_calls): # fto2.write(str(fq[j]) + '\t' + str(ffq[j]) + '\n') # Finally understood here -- when I give two arrays: x, y (at least append values IN ORDER like here) -- pyplot will plot y versus x plt.plot(fq, ffq, 'g', linewidth=0.3, label= 'distribution of Fq') plt.xlabel('Fq of calls') plt.ylabel('% Users') plt.legend() # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file plt.yscale('log') plt.xscale('log') figure_name = "/home/sscepano/D4D res/allstuff/SET3 frequent callers from python/1/distr of fq of calls2.png" # # this is a regular plot file, then comment the previous loglog block # figure_name = "/home/sscepano/D4D res/allstuff/SET3 frequent callers from python/1/SET3 distr of fq of calls.png" print(figure_name) plt.savefig(figure_name, format = "png") return
def save_to_plot(avg_usr_traj): nits = [] its = [] # a loop where we populate those two arrays from the file i = 0 # f = open(file_name, 'r') # # read the file for usr in range(500001): i = i + 1 nits.append(int(avg_usr_traj[usr])) its.append(usr) mi = min(nits) mx = max(nits) print("Minimum radius of gyr ", mi) print("Maximum radius of gyr ", mx) total_nit = float(sum(nits)) print("Total radius of gyr ", total_nit) pdf_nits = defaultdict(int) for j in range(0, len(nits)): pdf_nits[nits[j]] += 1 ordered = OrderedDict(sorted(pdf_nits.items(), key=lambda t: t[0])) nits7s = [] its7s = [] test = 0 for j in ordered.iterkeys(): nits7s.append(ordered[j]/500000.0) test += ordered[j]/500000.0 its7s.append(j) print test ############################################################################################################################ # THIS is to plot number of users pdf ############################################################################################################################ plt.figure(7) plt.plot(its7s, nits7s, 'o', linewidth=0.5, label= 'distribution of Rg') plt.xlabel('rg [km]') plt.ylabel('P(rg)') plt.legend() # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file plt.yscale('log') plt.xscale('log') figure_name = "/home/sscepano/D4D res/allstuff/traj/avg daily/avg_daily_traj_total.png" print(figure_name) plt.savefig(figure_name, format = "png", dpi=300) return
def from_file_radius_gyr3(file_name, subpref): users_list = rd.read_in_subpref_users(subpref) total = float(rd.read_in_subpref_num_users()[subpref]) if total > 0: nits = [] its = [] # a loop where we populate those two arrays from the file i = 0 f = open(file_name, 'r') # read the file for line in f: i = i + 1 it, nit = line.split('\t') nit = float(nit) it = int(it) if users_list[it] == 1: nit = int(nit) nits.append(nit) its.append(it) mi = min(nits) mx = max(nits) print("Minimum radius of gyr ", mi) print("Maximum radius of gyr ", mx) total_nit = float(sum(nits)) print("Total radius of gyr ", total_nit) pdf_nits = defaultdict(int) for j in range(0, len(nits)): pdf_nits[nits[j]] += 1 ordered = OrderedDict(sorted(pdf_nits.items(), key=lambda t: t[0])) nits7s = [] its7s = [] test = 0 #total = 500000.0 for j in ordered.iterkeys(): nits7s.append(ordered[j]/total) test += ordered[j]/total its7s.append(j) print test ############################################################################################################################ # THIS is to plot number of users pdf ############################################################################################################################ plt.figure(7) plt.plot(its7s, nits7s, 'o', linewidth=0.5, label= 'distribution of Rg') plt.xlabel('rg [km]') plt.ylabel('P(rg)') plt.legend() # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file plt.yscale('log') plt.xscale('log') figure_name = "/home/sscepano/D4D res/allstuff/rg/1/rg_" + str(subpref) + ".png" print(figure_name) plt.savefig(figure_name, format = "png", dpi=300) plt.clf() return
def from_file_radius_gyr(file_name): usr_rg = nn.zeros(500001) # a loop where we populate those two arrays from the file i = 0 f = open(file_name, 'r') # read the file for line in f: i = i + 1 u, rg = line.split('\t') rg = float(rg) u = int(u) usr_rg[u] = rg mi = min(usr_rg) mx = max(usr_rg) print("Minimum number of calls ", mi) print("Maximum number of calls ", mx) # total_u = float(sum(nc_distr)) # print("Total users found ", total_u) # test_file_out = "/home/sscepano/D4D res/allstuff/SET3 frequent callers from python/1/Obtained_num_calls_and_its_pct.tsv" # fto = open(test_file_out,"w") # for j in range(0, max_num_calls): # nc_distr_pct[j] = (nc_distr[j] / total_u) * 100 # fto.write(str(j) + '\t' + str(nc_distr_pct[j]) + '\n') # # I was just checking that the total percents sums up to 100 and they do but it looked funny as we have so small values # total = 0 # for i in range (max_num_calls): # total += percent_users[i] # print ("Check ", total) ############################################################################################################################ # THIS is to plot number of users pdf ############################################################################################################################ fig1 = plt.figure(1) ax = fig1.add_subplot(111) n, bins, rectangles = ax.hist(usr_rg, 100, normed=True) #plt.plot(nc_distr_pct, 'ro', linewidth=0.5, label= 'pdf Num of calls') plt.xlabel('rg [km]') plt.ylabel('P(rg)') plt.legend() # # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file plt.yscale('log') plt.xscale('log') figure_name = "/home/sscepano/D4D res/allstuff/rg/pdf rg loglog.png" # #this is a regular plot file, then comment the previous loglog block # figure_name = "/home/sscepano/D4D res/allstuff/rg/pdf rg.png" print(figure_name) plt.savefig(figure_name, format = "png") ################################################################################################################################ ## THIS is to plot fq pdf ################################################################################################################################ # # plt.figure(2) # # fq = [] # # for j in range(max_num_calls): # fq.append( float(j / 3360.0)) # # ffq = [] # # for j in range(max_num_calls): # ffq.append(nc_distr_pct[j]) # # ## test_file_out2 = "/home/sscepano/D4D res/allstuff/SET3 frequent callers from python/1/Calculated_fq_calls_and_its_pct2.tsv" ## fto2 = open(test_file_out2,"w") ## for j in range(0, max_num_calls): ## fto2.write(str(fq[j]) + '\t' + str(ffq[j]) + '\n') # # # # Finally understood here -- when I give two arrays: x, y (at least append values IN ORDER like here) -- pyplot will plot y versus x # plt.plot(fq, ffq, 'g.', linewidth=0.3, label= 'pdf fq of calls') # # plt.xlabel('fq of calls') # plt.ylabel('% Users') # plt.legend() # # # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file # plt.yscale('log') # plt.xscale('log') # figure_name = "/home/sscepano/D4D res/allstuff/SET3 frequent callers from python/1/SET3 distr of fq of calls loglog.png" # ## # this is a regular plot file, then comment the previous loglog block ## figure_name = "/home/sscepano/D4D res/allstuff/SET3 frequent callers from python/1/SET3 distr of fq of calls.png" # # print(figure_name) # plt.savefig(figure_name, format = "png") return
def from_files_usr_movements(): file_name1 = "/home/sscepano/D4D res/allstuff/USER GRAPHS stats/user_number_of_edges_v1.tsv" file_name2 = "/home/sscepano/D4D res/allstuff/USER GRAPHS stats/user_number_of_nodes_v1.tsv" file_name3 = "/home/sscepano/D4D res/allstuff/USER GRAPHS stats/user_number_of_displacements_v1.tsv" nits = [] its = [] # a loop where we populate those two arrays from the file i = 0 f = open(file_name3, "r") # read the file for line in f: i = i + 1 it, nit = line.split("\t") nit = int(nit) it = int(it) nit = int(nit) nits.append(nit) its.append(it) mi = min(nits) mx = max(nits) print ("Minimum # edges ", mi) print ("Maximum # edges ", mx) total_nit = float(sum(nits)) print ("Total # edges ", total_nit) pdf_nits = defaultdict(int) for j in range(0, len(nits)): pdf_nits[nits[j]] += 1 ordered = OrderedDict(sorted(pdf_nits.items(), key=lambda t: t[0])) nits7s = [] its7s = [] test = 0 for j in ordered.iterkeys(): nits7s.append(ordered[j] / 500000.0) test += ordered[j] / 500000.0 its7s.append(j) print test ############################################################################################################################ # THIS is to plot number of users pdf ############################################################################################################################ plt.figure(7) plt.plot(its7s, nits7s, "o", linewidth=0.5, label="distr. of # displacements ") plt.xlabel("# displacements") plt.ylabel("P(# displacements)") plt.legend() # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file plt.yscale("log") plt.xscale("log") # figure_name1 = "/home/sscepano/D4D res/allstuff/USER GRAPHS stats/usr_num_edges.png" # figure_name2 = "/home/sscepano/D4D res/allstuff/USER GRAPHS stats/usr_num_nodes.png" figure_name3 = "/home/sscepano/D4D res/allstuff/USER GRAPHS stats/usr_num_displacements.png" print (figure_name3) plt.savefig(figure_name3, format="png", dpi=300) return