Example #1
0
def calculate_traj_subpref_from_usr_file():
    
    usr_home = rd.read_in_user_home_subprefs()
    subpref_num_users = rd.read_in_subpref_num_users()
    
    subpref_rg = defaultdict(float)
    
    file_name = "/home/sscepano/D4D res/ORGANIZED/SET3/Clustering/usr res/usr_traj_length.tsv"
    f = open(file_name, 'r')
    
    for line in f:
        usr, rg = line.split('\t')
        usr =  int(usr)
        rg = float(rg)
        subpref_rg[usr_home[usr]] += rg
        
    for subpref in range(256):
        if subpref_num_users[subpref] > 0:
            subpref_rg[subpref] = subpref_rg[subpref] / float(subpref_num_users[subpref])
        
    file_name2 = "/home/sscepano/D4D res/allstuff/CLUSTERING/subpref res/subpref_avg_traj.tsv"
    f2 = open(file_name2, 'w')
    
    for subpref in range(256):
        if subpref_num_users[subpref] > 0:
            f2.write(str(subpref) + '\t' + str(subpref_rg[subpref]) + '\n')
             
    return subpref_rg  
Example #2
0
def calculate_fq_subpref_from_usr_file():
    
    usr_home = rd.read_in_user_home_subprefs()
    subpref_num_users = rd.read_in_subpref_num_users()
    
    subpref_fq = defaultdict(float)
    
    file_name = "/home/sscepano/D4D res/ORGANIZED/SET3/Distr of Num and Fq of Calls/new results -- check the same/Users_and_their_calling_fq.tsv"
    f = open(file_name, 'r')
    
    for line in f:
        usr, fq = line.split('\t')
        usr =  int(usr)
        fq = float(fq)
        subpref_fq[usr_home[usr]] += fq
        
    for subpref in range(256):
        if subpref_num_users[subpref] > 0:
            subpref_fq[subpref] = subpref_fq[subpref] / float(subpref_num_users[subpref])
        
    file_name2 = "/home/sscepano/D4D res/ORGANIZED/SET3/Distr of Num and Fq of Calls/subpref/Subpref_avg_calling_fq_check.tsv"
    f2 = open(file_name2, 'w')
    
    for subpref in range(256):
        if subpref_num_users[subpref] > 0:
            f2.write(str(subpref) + '\t' + str(subpref_fq[subpref]) + '\n')
             
    return subpref_fq

#calculate_fq_subpref_from_usr_file()
Example #3
0
def calculate_diversity(data):
 
################################################## 
###    this part calculates user diversity
##################################################
    div = defaultdict(int)
    
    # for each user calculate diversity using formula from Finger on the Pulse
    for usr in range(500001):
        # total places visits
        S = float(sum(data[usr].values()))
        
        if S > 0:
            for subpref in data[usr].iterkeys():
                if subpref <> -1:
                    # frequency of visits to this place
                    v = data[usr][subpref] / float(S)
                    div[usr] -= v * math.log(v)
            div[usr] = div[usr] / math.log(S)
            
        
#    file_name = "/home/sscepano/D4D res/allstuff/diversity of travel/usr_diversity_index.tsv"
#    f = open(file_name,"w")
#    
#    for usr in range(1,500001):
#        f.write(str(usr) + '\t' + str(div[usr]) + '\n')
#        
#    print file_name
#    f.close()

################################################################
### subpref diversity
################################################################
    num_subpref_usrs = rd.read_in_subpref_num_users()
    div_subpref = defaultdict(int)
    for subpref in range(1,256):
        # for places where we have users
        if num_subpref_usrs[subpref] > 0:
            # read ids of those users
            subpref_usrs = rd.read_in_subpref_users(subpref)
            # for each user check if he belongs here (not the fastest way but still fine)
            for usr in range(500001):
                # total places visits
                if subpref_usrs[usr] == 1:
                    # we sum all users diversity indexes and then avg
                    div_subpref[subpref] += div[usr]
            div_subpref[subpref] = div_subpref[subpref] / float(num_subpref_usrs[subpref])
                
    file_name2 = "/home/sscepano/D4D res/allstuff/diversity of travel/subpref_diversity_index.tsv"
    f2 = open(file_name2,"w")
        
    for subpref in range(1,256):
        if num_subpref_usrs[subpref] > 0:
            f2.write(str(subpref) + '\t' + str(div_subpref[subpref]) + '\n')
            
    print file_name2
    f2.close()
            
    return div, div_subpref
Example #4
0
def save_subpref_visited_pl():
    
    subpref_num_users = rd.read_in_subpref_num_users()
    subpref_pl = rd.read_in_subpref_num_places()
    
    file_name2 = "/home/sscepano/D4D res/allstuff/CLUSTERING/subpref res/subpref_avg_num_visited_pl.tsv"
    f2 = open(file_name2, 'w')
    
    for subpref in range(256):
        if subpref_num_users[subpref] > 0:
            f2.write(str(subpref) + '\t' + str(subpref_pl[subpref]) + '\n')
             
    return subpref_pl   
Example #5
0
def recalculate_num_visits_outside():
    
    total_visits = rd.read_in_BACK_num_of_visits_outisde_only()
    scaled_visits = defaultdict(float)
    num_usrs = rd.read_in_subpref_num_users()
    
    file_out = "/home/sscepano/D4D res/allstuff/CLUSTERING/num_outside_visits_scaled.tsv"
    f = open(file_out, "w")
    
    for subpref in range(256):
        if num_usrs[subpref] <> 0:
            scaled_visits[subpref] = total_visits[subpref] / num_usrs[subpref]
            
            print scaled_visits[subpref]
            f.write(str(subpref) + '\t' + str(scaled_visits[subpref]) + '\n')
    
    return
def data_to_files(data):
    
    num_visited_subprefs_per_subpref = defaultdict(int)
    num_visited_subprefs_per_subpref_scaled = defaultdict(int)
    usr_home = rd.read_in_user_home_subprefs()
    num_usrs = rd.read_in_subpref_num_users()
    
    for usr in range(500001):
        num_visited_subprefs_per_subpref[usr_home[usr]] += data[usr].sum()
        
    for subpref in range(256):
        if num_usrs[subpref] <> 0:
            num_visited_subprefs_per_subpref_scaled[subpref] = num_visited_subprefs_per_subpref[subpref] / num_usrs[subpref]
        
        
        
    file1 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/usr_num_visited_subprefs.tsv"
    file2 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/subpref_summed_num_visited_subprefs.tsv"
    file3 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/subpref_scaled_num_visited_subprefs.tsv"
    
    f1 = open(file1, "w")
    f2 = open(file2, "w")
    f3 = open(file3, "w")
    
    for usr in range(500001):
        f1.write(str(usr) + '\t' + str(n.sum(data[usr])) + '\n')
        
    for subpref in range(256):
        if num_visited_subprefs_per_subpref[subpref] <> 0:
            f2.write(str(subpref) + '\t' + str(num_visited_subprefs_per_subpref[subpref]) + '\n')
        
    for subpref in range(256):
        if num_visited_subprefs_per_subpref_scaled[subpref] <> 0:
            f3.write(str(subpref) + '\t' + str(num_visited_subprefs_per_subpref_scaled[subpref]) + '\n')
        
        
    print file1
    print file2
    print file3
    
    return
Example #7
0
def save_data_to_matrix(home_calls, last_usr_loc_n_dist):
    
    usr_home = rd.read_in_user_home_subprefs()
    subpref_avg_fq = rd.read_in_subpref_avg_fq()
    subpref_num_usr = rd.read_in_subpref_num_users()
    
    subpref_calls = defaultdict(int)
    subpref_outside_calls = defaultdict(int)
    subpref_pct_inside_calls = defaultdict(int)
    subpref_total_traj = defaultdict(int)
    
    file_o = "/home/sscepano/D4D res/allstuff/CLUSTERING/ALL_SUBPREF_clustering_args.tsv"
    f_o = open(file_o, "w")
    
    for usr in range(500001):
        subpref_calls[usr_home[usr]] += home_calls[usr][0]
        subpref_outside_calls[usr_home[usr]] += home_calls[usr][1]
        subpref_total_traj[usr_home[usr]] += last_usr_loc_n_dist[usr][1]
        
    print len(subpref_outside_calls)    
    print len(subpref_calls)
    print len(subpref_total_traj)
    print len(subpref_avg_fq)
    
#    for usr_id in usr_home:
#        if usr_home[usr_id] == 0:
#            print usr_id
        
    for subpref_id in range(256):
        suma = float(subpref_calls[subpref_id] + subpref_outside_calls[subpref_id])
        if suma <> 0:
            subpref_pct_inside_calls[subpref_id] = subpref_calls[subpref_id] / suma   
        subpref_total_traj[subpref_id] = subpref_total_traj[subpref_id] / subpref_num_usr[subpref_id]
        f_o.write(str(subpref_id) + '\t' + str(subpref_calls[subpref_id]) + '\t' + str(subpref_outside_calls[subpref_id]) + '\t' + \
                  str(subpref_pct_inside_calls[subpref_id]) + '\t' + str(subpref_total_traj[subpref_id]) + '\t' + str(subpref_avg_fq[subpref_id]) + '\n')
      
    return
Example #8
0
def save_data_to_file(data):
    
    num_users = rd.read_in_subpref_num_users()
    
    #print data[60]
    #subpref = 250
    
    total_weekday_pattern = defaultdict(int)
    
    for subpref in range(256):
        for hr in data[subpref]:
            total_weekday_pattern[hr] += data[subpref][hr]
    

#    from_data_to_timeplot3(total_weekday_pattern, subpref)
        
        
#    for subpref in range(256):
#        cum_data = get_cumulative_call_timing_data(data[subpref])
#        from_data_to_timeplot2(cum_data, subpref)   

#    wake_up_hour = defaultdict(int)
#    sleep_hour = defaultdict(int)
#
#    for subpref in range(256):
#        cum_data = get_cumulative_call_timing_data(data[subpref])
#        sleep_hour[subpref] = analyze_cum_calling_data2(cum_data)
#        wake_up_hour[subpref] = analyze_cum_calling_data1(cum_data)   
#        
#    file_name = "/home/sscepano/D4D res/allstuff/call timing/subpref_weekend_wake_up_sleep_hour.tsv"
#    f = open(file_name,"w")
#    
#    num_users = rd.read_in_subpref_num_users()
#    
#    for subpref in wake_up_hour.iterkeys():
#        if num_users[subpref] > 0:
#            f.write(str(subpref) + '\t' + str(wake_up_hour[subpref]) + '\t' + str(sleep_hour[subpref]) + '\n')

    
#    pct_night_calls = defaultdict(float)
#
#    for subpref in range(256):
#        pct_night_calls[subpref] = pct_calls_at_night(data[subpref]) 
#        
#    file_name = "/home/sscepano/D4D res/allstuff/call timing/subpref_weekdend_pct_night_calls.tsv"
#    f = open(file_name,"w")
#    
#    for subpref in pct_night_calls.iterkeys():
#        if num_users[subpref] > 0:
#            f.write(str(subpref) + '\t' + str(pct_night_calls[subpref]) + '\n')

#    for subpref in data.iterkeys():
#        file_name = "/home/sscepano/D4D res/allstuff/call timing/subpref all files/by day/calls_per_day_" + str(subpref) + ".tsv"
#        f = open(file_name, "w")
#        
#        for hr in data[subpref].iterkeys():
#            f.write(str(hr) + '\t' + str(data[subpref][hr]) + '\n')
#            
#        f.close()

    file_name = "/home/sscepano/D4D res/allstuff/call timing/total_calls_per_day.tsv"
    f = open(file_name, "w")
    
    for hr in total_weekday_pattern.iterkeys():
        f.write(str(hr) + '\t' + str(total_weekday_pattern[hr]) + '\n')
        
    f.close()
    
    return
Example #9
0
def from_file_radius_gyr3(file_name, subpref):
    
    users_list = rd.read_in_subpref_users(subpref)
    
    total = float(rd.read_in_subpref_num_users()[subpref])
    
    if total > 0:
    
        nits = []
        its = []
        
        # a loop where we populate those two arrays from the file
        i = 0
        f = open(file_name, 'r')    
        # read the file
        for line in f:
            i = i + 1
            it, nit = line.split('\t')
            nit = float(nit)
            it = int(it)
            if users_list[it] == 1:
                nit = int(nit)
                nits.append(nit)
                its.append(it)
    
        mi = min(nits)
        mx = max(nits)
        print("Minimum radius of gyr ", mi)
        print("Maximum radius of gyr ", mx)
        
        total_nit = float(sum(nits))
        print("Total radius of gyr ", total_nit)
        
        pdf_nits = defaultdict(int)
        
        for j in range(0, len(nits)):
            pdf_nits[nits[j]] += 1
            
        ordered = OrderedDict(sorted(pdf_nits.items(), key=lambda t: t[0]))
        
        nits7s = []
        its7s = []
        
        test = 0
        #total = 500000.0
        
        for j in ordered.iterkeys():
            nits7s.append(ordered[j]/total)
            test += ordered[j]/total
            its7s.append(j)
            
        print test
            
    ############################################################################################################################
    # THIS is to plot number of users pdf
    ############################################################################################################################
    
        plt.figure(7)
    
        plt.plot(its7s, nits7s, 'o', linewidth=0.5, label= 'distribution of Rg')
        
        plt.xlabel('rg [km]')
        plt.ylabel('P(rg)')
        plt.legend()   
        
        # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file   
        plt.yscale('log')
        plt.xscale('log')
        figure_name = "/home/sscepano/D4D res/allstuff/rg/1/rg_" + str(subpref) + ".png"
              
        print(figure_name)
        plt.savefig(figure_name, format = "png", dpi=300)      
        
        plt.clf()
    
    return