def write_wifi_features_for_knn(): for i in range(1, 16): id_y, label = get_y(i) label = LABELS[i-1] output_fp = os.path.join(cur_dir, 'data', 'matrix_data', 'for_knn', 'wifi_features', label + '.csv') fw = open(output_fp, 'a') all_features = {} features = ['edit_dist', 'start_time_var', 'end_time_var'] for feature in features: id_feature = read_feature(feature) all_features[feature] = id_feature labels = ['subject_id'] labels.extend(features) labels.append(label) fw.write(','.join(labels) + '\n') for id in ids: line = [id] for feature in features: line.append(all_features[feature][id]) if id in id_y: line.append(str(id_y[id])) else: line.append('') fw.write(','.join(line) + '\n') fw.close()
def plot(result): id_y, label = get_y(3) y_values = [] for id in WIFI_ID_HOME: #print id y_values.append(id_y[str(int(id))]) plt.scatter(result, y_values) plt.show()
def write_histogram_to_csv(): for i in range(1, 16): print i id_y, label = get_y(i) label = LABELS[i-1] print label output_fp = os.path.join(cur_dir, 'data', 'matrix_data', 'for_knn', 'freq_histogram', label + '.csv') fw = open(output_fp, 'a') addr_dir = os.path.join(cur_dir, 'data', 'gps_osm') for file in os.listdir(addr_dir): if not file.endswith('.csv'): continue fp = os.path.join(addr_dir, file) id = file.split('.')[0][-2:] change_dt = get_change_date(fp) complete, by_complete_dates = get_complete_days(fp, change_dt) if len(complete) < 30: continue print print 'subject id: ' + id if not id in id_y: continue #sample_days = random.sample(complete, NUM_DAYS) sample_days = complete[:NUM_DAYS] loc_freq = get_loc_freq(sample_days, by_complete_dates) loc_freq = merge_homes(loc_freq, id) all_loc_freq = get_all_loc_freq(loc_freq) all_loc_freq = sorted(all_loc_freq.items(), key=lambda item: item[0]) line = [str(item[1]) for item in all_loc_freq] line.append(str(id_y[id])) fw.write(','.join(line) + '\n') fw.close()