예제 #1
0
def calculate_fq_subpref_from_usr_file():
    
    usr_home = rd.read_in_user_home_subprefs()
    subpref_num_users = rd.read_in_subpref_num_users()
    
    subpref_fq = defaultdict(float)
    
    file_name = "/home/sscepano/D4D res/ORGANIZED/SET3/Distr of Num and Fq of Calls/new results -- check the same/Users_and_their_calling_fq.tsv"
    f = open(file_name, 'r')
    
    for line in f:
        usr, fq = line.split('\t')
        usr =  int(usr)
        fq = float(fq)
        subpref_fq[usr_home[usr]] += fq
        
    for subpref in range(256):
        if subpref_num_users[subpref] > 0:
            subpref_fq[subpref] = subpref_fq[subpref] / float(subpref_num_users[subpref])
        
    file_name2 = "/home/sscepano/D4D res/ORGANIZED/SET3/Distr of Num and Fq of Calls/subpref/Subpref_avg_calling_fq_check.tsv"
    f2 = open(file_name2, 'w')
    
    for subpref in range(256):
        if subpref_num_users[subpref] > 0:
            f2.write(str(subpref) + '\t' + str(subpref_fq[subpref]) + '\n')
             
    return subpref_fq

#calculate_fq_subpref_from_usr_file()
예제 #2
0
def calculate_traj_subpref_from_usr_file():
    
    usr_home = rd.read_in_user_home_subprefs()
    subpref_num_users = rd.read_in_subpref_num_users()
    
    subpref_rg = defaultdict(float)
    
    file_name = "/home/sscepano/D4D res/ORGANIZED/SET3/Clustering/usr res/usr_traj_length.tsv"
    f = open(file_name, 'r')
    
    for line in f:
        usr, rg = line.split('\t')
        usr =  int(usr)
        rg = float(rg)
        subpref_rg[usr_home[usr]] += rg
        
    for subpref in range(256):
        if subpref_num_users[subpref] > 0:
            subpref_rg[subpref] = subpref_rg[subpref] / float(subpref_num_users[subpref])
        
    file_name2 = "/home/sscepano/D4D res/allstuff/CLUSTERING/subpref res/subpref_avg_traj.tsv"
    f2 = open(file_name2, 'w')
    
    for subpref in range(256):
        if subpref_num_users[subpref] > 0:
            f2.write(str(subpref) + '\t' + str(subpref_rg[subpref]) + '\n')
             
    return subpref_rg  
예제 #3
0
def data_to_files(data):
    
    num_visited_subprefs_per_subpref = defaultdict(int)
    num_visited_subprefs_per_subpref_scaled = defaultdict(int)
    usr_home = rd.read_in_user_home_subprefs()
    num_usrs = rd.read_in_subpref_num_users()
    
    for usr in range(500001):
        num_visited_subprefs_per_subpref[usr_home[usr]] += data[usr].sum()
        
    for subpref in range(256):
        if num_usrs[subpref] <> 0:
            num_visited_subprefs_per_subpref_scaled[subpref] = num_visited_subprefs_per_subpref[subpref] / num_usrs[subpref]
        
        
        
    file1 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/usr_num_visited_subprefs.tsv"
    file2 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/subpref_summed_num_visited_subprefs.tsv"
    file3 = "/home/sscepano/D4D res/allstuff/distr of num of visited subprefs/1/subpref_scaled_num_visited_subprefs.tsv"
    
    f1 = open(file1, "w")
    f2 = open(file2, "w")
    f3 = open(file3, "w")
    
    for usr in range(500001):
        f1.write(str(usr) + '\t' + str(n.sum(data[usr])) + '\n')
        
    for subpref in range(256):
        if num_visited_subprefs_per_subpref[subpref] <> 0:
            f2.write(str(subpref) + '\t' + str(num_visited_subprefs_per_subpref[subpref]) + '\n')
        
    for subpref in range(256):
        if num_visited_subprefs_per_subpref_scaled[subpref] <> 0:
            f3.write(str(subpref) + '\t' + str(num_visited_subprefs_per_subpref_scaled[subpref]) + '\n')
        
        
    print file1
    print file2
    print file3
    
    return
예제 #4
0
def save_data_to_matrix(home_calls, last_usr_loc_n_dist):
    
    usr_home = rd.read_in_user_home_subprefs()
    subpref_avg_fq = rd.read_in_subpref_avg_fq()
    subpref_num_usr = rd.read_in_subpref_num_users()
    
    subpref_calls = defaultdict(int)
    subpref_outside_calls = defaultdict(int)
    subpref_pct_inside_calls = defaultdict(int)
    subpref_total_traj = defaultdict(int)
    
    file_o = "/home/sscepano/D4D res/allstuff/CLUSTERING/ALL_SUBPREF_clustering_args.tsv"
    f_o = open(file_o, "w")
    
    for usr in range(500001):
        subpref_calls[usr_home[usr]] += home_calls[usr][0]
        subpref_outside_calls[usr_home[usr]] += home_calls[usr][1]
        subpref_total_traj[usr_home[usr]] += last_usr_loc_n_dist[usr][1]
        
    print len(subpref_outside_calls)    
    print len(subpref_calls)
    print len(subpref_total_traj)
    print len(subpref_avg_fq)
    
#    for usr_id in usr_home:
#        if usr_home[usr_id] == 0:
#            print usr_id
        
    for subpref_id in range(256):
        suma = float(subpref_calls[subpref_id] + subpref_outside_calls[subpref_id])
        if suma <> 0:
            subpref_pct_inside_calls[subpref_id] = subpref_calls[subpref_id] / suma   
        subpref_total_traj[subpref_id] = subpref_total_traj[subpref_id] / subpref_num_usr[subpref_id]
        f_o.write(str(subpref_id) + '\t' + str(subpref_calls[subpref_id]) + '\t' + str(subpref_outside_calls[subpref_id]) + '\t' + \
                  str(subpref_pct_inside_calls[subpref_id]) + '\t' + str(subpref_total_traj[subpref_id]) + '\t' + str(subpref_avg_fq[subpref_id]) + '\n')
      
    return