def optimCurveFit(strategy, method_clsf, ratio=0.8, NV_type='NVequals'):
    constrain_time = True

    ######################
    #TODO Step 1: Data input
    ######################
    data_set = 'mitdb'  # 'ecgiddb', 'mitdb'
    channel = 0
    records, IDs, fss, annss = mf.load_data(
        data_set, channel)  #, num_persons=60, record_time=20)
    fs = fss[0]

    records = np.array(records)
    IDs = np.array(IDs)
    annss = np.array(annss)
    ######################

    ######################
    #TODO Step 2: Data selection
    ######################

    if (strategy == 'allN_data') or (strategy == 'all_data'):
        ''  # do nothing here
    elif strategy == 'NV_data':
        NV_inds = [6, 15, 18, 23, 24, 26, 29, 31, 33, 35, 39, 41, 42, 46]
        #for i in NV_inds: #range(annss.shape[0]): #
        #    print i, Counter(annss[i][1])['V']

        records = records[NV_inds, :]
        IDs = IDs[NV_inds]
        annss = annss[NV_inds, :]

        ## re-numbering the IDs... wtf
        for i in range(len(NV_inds)):
            IDs[i] = i
    elif strategy == 'combine_IDs':
        num_to_combine = 4
        print IDs

        for i in range(int(len(records) / num_to_combine)):
            for j in range(num_to_combine - 1):
                IDs[i * num_to_combine + j + 1] = IDs[i * num_to_combine + j]
            #IDs[i*2+1] = IDs[i*2]
        for i in range(len(IDs)):
            IDs[i] /= num_to_combine

    if constrain_time:
        look_time = 600.  # in s
        look_ind = int(look_time * fs)
        records = records[:, :look_ind]
        annss = annss[:, :look_ind]

    recs = []
    for i in range(len(records)):
        curr_rec = Rec(records[i], fs, IDs[i], annss[i])
        recs.append(curr_rec)
    ######################

    ######################
    #TODO Step 3: Data filtering
    ######################

    ######################

    ######################
    #TODO Step 4: Data segmentation
    ######################
    USE_BIOSPPY_FILTERED = True
    sigs, labels_bySegs = mf.get_seg_data(records,
                                          IDs,
                                          fss,
                                          USE_BIOSPPY_FILTERED,
                                          annss=annss)
    sigs, labels_bySegs = np.array(sigs), np.array(labels_bySegs)
    mrks_bySegs = np.array([x[-1] for x in labels_bySegs])

    if strategy == 'allN_data':
        N_masks = (mrks_bySegs == 'N')
        sigs = sigs[N_masks, :]
        labels_bySegs = labels_bySegs[N_masks]

    IDs_bySegs = [int(x[:-1]) for x in labels_bySegs]
    mrks_bySegs = [x[-1] for x in labels_bySegs]
    IDs_bySegs, mrks_bySegs = np.array(IDs_bySegs), np.array(mrks_bySegs)

    segs = []
    for i in range(len(sigs)):
        curr_seg = Seg(sig=sigs[i],
                       fs=fs,
                       ID=IDs_bySegs[i],
                       mrk=mrks_bySegs[i])
        segs.append(curr_seg)
    segs = np.array(segs)
    ######################

    #for one_label in labels_all:
    #    if ('N' in one_label) or ('V' in one_label):
    #        print one_label
    #quit()

    #segs_all, labels_all = np.array(segs_all), np.array(labels_all)

    ######################
    #TODO Step 5: feature extraction
    ######################
    X_all = []
    y_all = []
    method_feat = 'PCA'  # 'template_matching'

    if method_feat == 'PCA':
        feat_dim = 20
        pca = PCA(n_components=feat_dim)
        X_all = np.array([x.sig for x in segs])
        X_all = pca.fit(X_all).transform(X_all)

        for i in range(len(segs)):
            segs[i].feat = X_all[i, :]
        y_all = np.array([x.ID for x in segs])

    X_all = np.array(X_all)
    ######################

    ######################
    #TODO Step 6: Data split
    ######################
    if strategy != 'NV_data':
        X_train, X_test, y_train, y_test = train_test_split(X_all,
                                                            y_all,
                                                            test_size=0.2,
                                                            random_state=42)
    else:
        X_train, X_test, y_train, y_test = [], [], [], []
        y_test_mrks = []
        for i in range(len(NV_inds)):
            curr_mrks = mrks_bySegs[IDs_bySegs == i]  #current people's mrks\
            #print curr_mrks

            curr_segs = segs[IDs_bySegs == i]
            curr_labels = labels_bySegs[IDs_bySegs == i]

            curr_inds_Vs = np.where(curr_mrks == 'V')[0]
            curr_inds_Ns = np.where(curr_mrks == 'N')[0]

            curr_num_Vs = sum(np.array(curr_mrks) == 'V')  #all his Vs
            curr_num_Ns = sum(np.array(curr_mrks) == 'N')

            if NV_type == 'fixV':
                train_num_Vs = int(curr_num_Vs * .8)
                train_num_Ns = min(
                    [int(curr_num_Ns * .8),
                     int(ratio * train_num_Vs)])
            elif NV_type == 'NVequals':
                train_num_Vs = int(curr_num_Vs * ratio)
                train_num_Ns = train_num_Vs

            train_inds_Vs = random.sample(curr_inds_Vs, train_num_Vs)
            test_inds_Vs = [
                x for x in curr_inds_Vs if not (x in train_inds_Vs)
            ]

            #test_inds_Vs = curr_inds_Vs[~ train_inds_Vs]
            train_inds_Ns = random.sample(curr_inds_Ns, train_num_Ns)
            test_inds_Ns = [
                x for x in curr_inds_Ns if not (x in train_inds_Ns)
            ]

            #print len(train_inds_Vs), len(test_inds_Vs)
            #print len(train_inds_Ns), len(test_inds_Ns)

            #test_inds_Ns = curr_inds_Vs[~ train_inds_Ns]
            #        print train_inds_Ns
            #        print test_inds_Ns

            curr_IDs = IDs_bySegs[IDs_bySegs == i]
            #print curr_IDs

            for one_seg in curr_segs[train_inds_Vs]:
                X_train.append(one_seg.feat.tolist())
            for one_lab in curr_IDs[train_inds_Vs]:
                y_train.append(one_lab)

            for one_seg in curr_segs[train_inds_Ns]:
                X_train.append(one_seg.feat.tolist())
            for one_lab in curr_IDs[train_inds_Ns]:
                y_train.append(one_lab)

            for one_seg in curr_segs[test_inds_Vs]:
                X_test.append(one_seg.feat.tolist())
            for one_lab in curr_IDs[test_inds_Vs]:
                y_test.append(one_lab)
            for one_mrk in curr_mrks[test_inds_Vs]:
                y_test_mrks.append(one_mrk)

            for one_seg in curr_segs[test_inds_Ns]:
                X_test.append(one_seg.feat.tolist())
            for one_lab in curr_IDs[test_inds_Ns]:
                y_test.append(one_lab)
            for one_mrk in curr_mrks[test_inds_Ns]:
                y_test_mrks.append(one_mrk)

            #print i
            #print len(X_train), len(y_train), len(X_test), len(y_test)

    X_train, y_train, X_test, y_test = \
    np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)

    ######################

    #print X_train.shape, y_train.shape, X_test.shape, y_test.shape
    #quit()
    #print X_train
    #print X_test
    #y_train = [int(y[:-1]) for y in y_train]
    #y_test = [int(y[:-1]) for y in y_test]

    ######################
    #TODO Step 7: Model training
    ######################
    time_before_training = Time()

    if method_clsf == 'SVM':
        not_trained = True
        from sklearn.externals import joblib
        if not_trained:
            clf = svm.SVC(kernel='rbf', C=10., gamma=0.1)
            clf.fit(X_train, y_train)
            joblib.dump(clf, 'test_clf.pkl')
        else:
            clf = joblib.load('test_clf.pkl')
        res_pred = clf.predict(X_test)
    elif method_clsf == 'Logit':
        clf = LR(C=10.)
        clf.fit(X_train, y_train)
        res_pred = clf.predict(X_test)
    elif method_clsf == 'kNN':
        clf = KNC()
        clf.fit(X_train, y_train)
        res_pred = clf.predict(X_test)
    elif method_clsf == 'DTC':
        clf = DTC()
        clf.fit(X_train, y_train)
        res_pred = clf.predict(X_test)
    elif method_clsf == 'boosting':
        clf = XGBC()
        clf.fit(X_train, y_train)
        res_pred = clf.predict(X_test)
    elif method_clsf == 'GNB':
        clf = GNB()
        clf.fit(X_train, y_train)
        res_pred = clf.predict(X_test)
    elif method_clsf == 'DL':
        not_trained = True
        from sklearn.externals import joblib

        if not_trained:
            model = Sequential()
            model.add(
                Dense(feat_dim, activation='relu', input_shape=(feat_dim, )))
            #model.add(Dense(input_dim,activation='relu'))

            num_categs = len(set(y_train))

            print y_train, num_categs
            Y_train = np_utils.to_categorical(y_train, num_categs)
            Y_test = np_utils.to_categorical(y_test, num_categs)

            model.add(Dense(num_categs, activation='softmax'))

            model.compile(loss='categorical_crossentropy',
                          optimizer='adam',
                          metrics=['accuracy'])
            X_train = np.array(X_train)
            Y_train = np.array(Y_train)
            #print X_train.shape
            #print Y_train.shape

            model.fit(X_train,
                      Y_train,
                      validation_split=0.2,
                      batch_size=32,
                      nb_epoch=50,
                      verbose=0)
            #model.save('test_clf_DL.pkl')
        else:
            model = keras.models.load_model('test_clf_DL.pkl')
        #score = model.evaluate(X_test, Y_test, verbose=0)

    time_after_training = Time()

    ######################
    #TODO Step 8: Model testing
    ######################
    if method_clsf != 'DL':
        res_pred = clf.predict(X_test)
    else:
        res_pred = model.predict_classes(X_test)
    ######################

    ######################
    #TODO Step 9: Result output
    ######################
    train_time = time_after_training - time_before_training

    print_res = False
    if print_res:
        print ''
        print 'Parameters:'
        print 'strategy:', strategy
        print 'constrain_time:', constrain_time
        print 'ratio:', ratio
        print 'method_clsf:', method_clsf

        #print ''

        print 'Results:'
        print 'Used time for training:', time_after_training - time_before_training

    res_look = []
    for i in range(len(res_pred)):
        res_look.append((res_pred[i], y_test[i]))
    #print res_look

    if False:
        res_pred_IDs = np.array([y[:-1] for y in res_pred])
        res_pred_mrks = np.array([y[-1] for y in res_pred])

        only_test_ID = True
        if only_test_ID:
            to_be_predct = res_pred_IDs
            to_be_tested = y_test
        else:
            to_be_predct = res_pred
            to_be_tested = y_test

    ##TODO: adjust accordingly
    if strategy == 'NV_data':
        look_stat = 'V'
        y_test_mrks = np.array(y_test_mrks)
        #print y_test_mrks
        to_be_predct = res_pred[y_test_mrks == look_stat]
        to_be_tested = y_test[y_test_mrks == look_stat]

        res_by_seg = mf.get_corr_ratio(res_pred=to_be_predct,
                                       y_test=to_be_tested,
                                       type='by_seg')
        res_by_categ = mf.get_corr_ratio(res_pred=to_be_predct,
                                         y_test=to_be_tested,
                                         type='by_categ')
        one_res = (float(format(res_by_seg,
                                '.3f')), float(format(res_by_categ, '.3f')))
        accuBySeg_V = one_res[0]
        #print len(to_be_predct), one_res

        look_stat = 'N'
        to_be_predct = res_pred[y_test_mrks == look_stat]
        to_be_tested = y_test[y_test_mrks == look_stat]

        res_by_seg = mf.get_corr_ratio(res_pred=to_be_predct,
                                       y_test=to_be_tested,
                                       type='by_seg')
        res_by_categ = mf.get_corr_ratio(res_pred=to_be_predct,
                                         y_test=to_be_tested,
                                         type='by_categ')
        one_res = (float(format(res_by_seg,
                                '.3f')), float(format(res_by_categ, '.3f')))
        accuBySeg_N = one_res[0]
        #print len(to_be_predct), one_res
        return [accuBySeg_V, accuBySeg_N, train_time]
    else:
        to_be_predct = res_pred
        to_be_tested = y_test

        res_by_seg = mf.get_corr_ratio(res_pred=to_be_predct,
                                       y_test=to_be_tested,
                                       type='by_seg')
        res_by_categ = mf.get_corr_ratio(res_pred=to_be_predct,
                                         y_test=to_be_tested,
                                         type='by_categ')
        one_res = (float(format(res_by_seg,
                                '.3f')), float(format(res_by_categ, '.3f')))
        return [one_res[0], train_time]
Esempio n. 2
0
from sklearn.linear_model import LogisticRegression as LR
from sklearn.neighbors import KNeighborsClassifier as KNC
from xgboost import XGBClassifier as XGBC

from sklearn.decomposition import PCA
from sklearn.naive_bayes import GaussianNB as GNB

import pywt, csv

#CHANNEL_IND = channels[ind_ds] #
data_set = 'ecgiddb' # 'ecgiddb', 'mitdb'
channel = 1
records, labels, fss = mf.load_data(data_set, channel, num_persons=30, record_time=20)

USE_BIOSPPY_FILTERED = True
segs_all, labels_all = mf.get_seg_data(records, labels, fss, USE_BIOSPPY_FILTERED)

methods_feat = ['DL']
#ind_fts = 0
#method_feat = 'orig' #method_feats[ind_fts]

methods_clsf = ['Logit', 'GNB', 'kNN', 'SVC', 'boosting', 'DL']
#ind_cls = 0
#method_clsf = 'Logit' #methods_clsfy[ind_cls]

num_feats = len(methods_feat)
num_clsfy = len(methods_clsf)


X_all, y_all = np.array(segs_all), np.array(labels_all)
    recs.append(curr_rec)
######################

######################
#TODO Step 3: Data filtering
######################

######################

######################
#TODO Step 4: Data segmentation
######################
USE_BIOSPPY_FILTERED = True
sigs, labels_bySegs = mf.get_seg_data(records,
                                      IDs,
                                      fss,
                                      USE_BIOSPPY_FILTERED,
                                      annss=annss)
sigs, labels_bySegs = np.array(sigs), np.array(labels_bySegs)
mrks_bySegs = np.array([x[-1] for x in labels_bySegs])

if strategy == 'allN_data':
    N_masks = (mrks_bySegs == 'N')
    sigs = sigs[N_masks, :]
    labels_bySegs = labels_bySegs[N_masks]

IDs_bySegs = [int(x[:-1]) for x in labels_bySegs]
mrks_bySegs = [x[-1] for x in labels_bySegs]
IDs_bySegs, mrks_bySegs = np.array(IDs_bySegs), np.array(mrks_bySegs)

segs = []