Esempio n. 1
0
def get_feature():
    id_feature = {}

    for file in os.listdir(wifi_dir):
        if not file.endswith('.csv') or file.endswith('datetime.csv'):
            continue
        
        id = file.split('.')[0][-2:]
        
#         if id in REMOVE_SUBJECTS:
#             continue
    
        print '----------'
        print 'id: ' + id

        seqs = get_seqs(id)
        #print seqs
        #print len(seqs)
        result = get_len_var(seqs)
        print result
        
        id_feature[id] = result
        
    return id_feature
def get_freq_pattern(min_support, typed, normalize):
    ### get seqs
    ids = []
    all_seqs = []  # all seqs of all subjects
    seqs_by_subject = []  # n subjects, length n
    for file in os.listdir(wifi_dir):
        if not file.endswith(".csv") or file.endswith("datetime.csv"):
            continue
        id = file.split(".")[0][-2:]
        ids.append(id)
        seqs = get_seqs(id)
        if typed:
            type_seqs = to_loc_type(seqs)
            all_seqs.extend(type_seqs)
            seqs_by_subject.append(type_seqs)
        else:
            all_seqs.extend(seqs)
            seqs_by_subject.append(seqs)

    ### get freq_patterns from all_seqs
    freq_patterns = []
    level = 1
    max_pattern_len = 6
    prev_level_freq = []
    while level <= max_pattern_len:
        freq = gsp(all_seqs, level, prev_level_freq, min_support, typed)
        freq_patterns.extend(freq)
        prev_level_freq = freq
        level += 1
    pp.pprint(freq_patterns)
    print len(freq_patterns)

    ### compute frequency of freq_patterns for each subject
    n = len(ids)  # n subjects
    m = len(freq_patterns)  # m frequent patterns
    count = np.zeros((n, m))  # n x m matrix
    for i in range(n):
        for seq in seqs_by_subject[i]:
            for j, pat in enumerate(freq_patterns):
                pat_str = ",".join(pat)
                if pat_str in ",".join(seq):
                    count[i, j] += 1
    print np.sum(count, axis=0)

    ### use frequency as feature, write to csv
    for j, pat in enumerate(freq_patterns):
        id_feature = {}
        feature = count[:, j]
        # print feature
        for i, id in enumerate(ids):
            id_feature[id] = feature[i]
        feature_name = "fp_" + ";".join(pat)
        if typed:
            write_feature_to_csv(
                id_feature, feature_name, os.path.join("freq_pat", "typed", "support%d" % min_support), False
            )
        elif normalize:
            write_feature_to_csv(
                id_feature, feature_name, os.path.join("freq_pat", "normalized", "support%d" % min_support), True
            )
        else:
            write_feature_to_csv(id_feature, feature_name, os.path.join("freq_pat", "support%d" % min_support), False)