n = len(ids) m = len(freq_pat) count = np.zeros((n, m)) for i in range(n): for seq in seqs_by_subject[i]: # print subject i daily sequences if i == 1: print ','.join(seq) for j, (pat,f) in enumerate(freq_pat): if pat in ','.join(seq): count[i, j] += 1 print count print n, m print count[1] print count[:, 0:7] ## write the first 7 frequent patterns to csv for j, (pat,f) in enumerate(freq_pat[:2]): id_feature = {} feature = count[:, j] #print feature for i, id in enumerate(ids): id_feature[id] = feature[i] feature_name = "fp_test_" + pat.replace(',',';') write_feature_to_csv(feature_name, id_feature)
if len(complete) < 30: continue print print 'subject id: ' + id sample_days = complete[:30] # seqs = per_subject_by_weekdays(fp, sample_days) # result = get_weekday_sum_avg_edit_dist(seqs) seqs = per_subject(fp, sample_days, by_complete_dates) #pprint(seqs) result = get_avg_edit_dist(seqs) id_feature[id] = result return id_feature if __name__ == '__main__': # fp = os.path.join(cur_dir, 'data', 'gps_osm', 'wifigps_addr_04.csv') # per_subject_by_weekdays(fp) #all_subjects_plot() id_edit_dist = get_feature() write_feature_to_csv('gps_edit_dist', id_edit_dist)
id_feature[id] = result return id_feature if __name__ == '__main__': # fp = r'data\by subjects\wifigps_subject08.csv' # get_avg_edit_dist(fp) id_edit_dist = get_feature() write_feature_to_csv('len_diff', id_edit_dist) #write_raw_feature_to_csv('len_diff', id_edit_dist)
id_feature[id] = result return id_feature if __name__ == '__main__': # fp = r'data\by subjects\wifigps_subject08.csv' # get_avg_edit_dist(fp) feature = get_feature() write_feature_to_csv('num_pattern', feature)