from sklearn.cross_validation import StratifiedKFold, LeavePLabelOut, cross_val_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import xgboost as xgb

from matplotlib import pyplot as plt
from utils import *
from spp_00_load_data import load_features

nsubject = 1
# feat_select = ['stat']
feat_select = ['sp_entropy']
# feat_select = ['mfj']

# filename_tr = 'sp2016_feat_train_{0}_stat_20160915'.format(nsubject)
# Xt, y, aFeatNames, aFiles_tr, plabels, data_q = get_from_10_min(filename_tr)
X, y, aFeatNames, aFiles_tr, plabels, data_q = load_features(
    'train', nsubject, feat_select)

ind = np.sum(np.isnan(X), axis=0) < 50
X = X[:, ind]

print 'Subject: ', nsubject
print 'Original dataset'
print X.shape
print y.shape
print list(aFeatNames)
y = y.ravel()

# clf = xgb.XGBClassifier()
# parameters = {
#     'max_depth': range(2, 31, 5),
#     'gamma': [0, 2],
Ejemplo n.º 2
0
# sall = ['stat', 'spectral', 'sp_entropy', 'mfj', 'corr']
# feat_select = [sall, sall, sall]

feat_select = [['stat'], ['stat'], ['stat']]

REMOVE_COVARIATE_SHIFT = True

pp = PreprocessPipeline(remove_outliers=True, standardize=True)

d_data_train = dict()
d_data_test = dict()
for i in range(0, 3):

    nsubject = i + 1

    XTRAIN, ytrain, aFeatNames_tr, aFiles_tr, plabels_tr, data_q_tr = load_features(
        'train', nsubject, feat_select[i])
    XTEST, ytest, aFeatNames_ts, aFiles_ts, plabels_ts, data_q_ts = load_features(
        'test', nsubject, feat_select[i])

    pp.fit(XTRAIN, XTEST, drop_nan=True)

    print 'Subject: ', nsubject
    print 'Original dataset'
    print XTRAIN.shape
    print ytrain.shape

    XTRAIN, ytrain, plabels_tr = drop_data_quality_thr(XTRAIN, ytrain,
                                                       plabels_tr, data_q_tr,
                                                       10)
    XTRAIN = pp.transform(XTRAIN)
Ejemplo n.º 3
0
import numpy as np
from matplotlib import pyplot as plt
import sys

from utils import *
from spp_00_load_data import load_features

nsubject = 1

# feat_select = ['stat']
# feat_select = ['spectral']
# feat_select = ['sp_entropy']
feat_select = ['mfj']
# feat_select = ['spectral', 'sp_entropy']

XTRAIN, ytrain, aFeatNames, aFiles_tr, plabels, data_q = load_features(
    'train', nsubject, feat_select)
XTEST, ytest, aFeatNames_ts, dummy4, dummy5, dummy3 = load_features(
    'test', nsubject, feat_select)

XTRAIN, ytrain, XTEST, aFeatNames, plabels, data_q = \
    preprocess_pipeline(XTRAIN, ytrain, XTEST, aFeatNames, plabels, data_q, verbose=True)

print 'Original dataset'
print 'TRAIN:', XTRAIN.shape
print 'ytrain', ytrain.shape

thr = 10
XTRAIN, ytrain, plabels = drop_data_quality_thr(XTRAIN, ytrain, plabels,
                                                data_q, thr)

print '\nRemoved data quality with treshold: ', thr
Ejemplo n.º 4
0
from python.utils_learning import OutliersWinsorization
from sklearn.linear_model import lasso_stability_path
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from spp_00_load_data import load_features
from utils import *

nsubject = 2
bappend_test = False
# feat_select = ['stat']
feat_select = ['mfj', 'sp_entropy']
# feat_select = ['spectral', 'sp_entropy']

X_train, y_train, aFeatNames, aFiles_tr, p, data_q = load_features(
    'train', nsubject, feat_select)
# X_test, y_test, dummy1, aFiles_ts, dummy2, dummy3 = load_feaures('test', nsubject, feat_select)

X_test = X_train.copy()
y_test = y_train.copy()
aFiles_ts = aFiles_tr

ind = np.sum(np.isnan(X_train), axis=0) < 50
X_train = X_train[:, ind]
aFeatNames = [s for i, s in enumerate(aFeatNames) if ind[i] == True]
X_test = X_test[:, ind]
''' drop nans '''
X_train, y_train, dummy4 = drop_nan(X_train, y_train, y_train.copy())
X_test, y_test, dummy5 = drop_nan(X_test, y_test, y_test.copy())

print 'Subject: ', nsubject
Ejemplo n.º 5
0
def load_features_and_preprocess(nsubject,
                                 feat_select,
                                 settings,
                                 verbose=True):

    qthr = settings.qthr
    remove_covariates = settings.remove_covariate_shift
    remove_outliers = settings.remove_outliers
    standardize = settings.standardize
    drop_nan = settings.drop_nan

    # XTRAIN, ytrain, aFeatNames_tr, aFiles_tr, plabels_tr, data_q_tr = load_features('train', nsubject, feat_select)
    # XTEST, ytest, aFeatNames_ts, aFiles_ts, plabels_ts, data_q_ts = load_features('test', nsubject, feat_select)

    data_tr = load_features('train', nsubject, feat_select)
    data_ts = load_features('test', nsubject, feat_select)

    XTRAIN, ytrain, plabels_tr, data_q_tr = data_tr['X'], data_tr[
        'y'], data_tr['plabels'], data_tr['data_q']
    XTEST, ytest, plabels_ts = data_ts['X'], data_ts['y'], data_ts['plabels']

    aFeatNames_tr = data_tr['aFeatNames']
    aFeatNames_ts = data_ts['aFeatNames']

    # data['X'] = X
    # data['y'] = y
    # data['aFeatNames'] = afeatnames
    # data['aFiles'] = aFiles
    # data['plabels'] = plabels
    # data['plabels_10min'] = p_labels_10min
    # data['data_q'] = data_q

    if verbose:
        print '############ Subject: ', nsubject, ' ########### '
        print ' -- Features: ', '_'.join(feat_select)

    pp = PreprocessPipeline(remove_outliers=remove_outliers,
                            standardize=standardize)
    pp.fit(XTRAIN, XTEST)

    if verbose:
        print ' -- Original dataset'
        print 'TRAIN:', XTRAIN.shape
        print 'ytrain', ytrain.shape

    if drop_nan:
        XTRAIN, ytrain, plabels_tr, ind_nan_tr = drop_data_quality_thr(
            XTRAIN, ytrain, plabels_tr, data_q_tr, qthr)
        ind_nan_tr = ind_nan_tr[~ind_nan_tr]

    else:
        ind_nan_tr = np.any(np.isnan(XTRAIN), axis=1)

    ind_nan_ts = np.any(np.isnan(XTEST), axis=1)
    XTEST[ind_nan_ts] = 0
    ytest = ytest.ravel()
    plabels_tr = plabels_tr.ravel()
    plabels_ts = plabels_ts.ravel()
    # XTEST, ytest, plabels_ts, ind_nan_ts = drop_data_quality_thr(XTEST, ytest, plabels_ts, data_q_ts, qthr)

    if verbose:
        print ' -- Removed data quality with treshold: ', qthr
        print 'TRAIN :', XTRAIN.shape
        print 'ytrain:', ytrain.shape
        print 'XTEST :', XTEST.shape
        print 'ytest:', ytest.shape

    XTRAIN = pp.transform(XTRAIN)
    XTEST = pp.transform(XTEST)

    if remove_covariates:
        l_feat_remove = load_removed_features(nsubject, feat_select)
        # l_feat_remove_all = load_removed_features(nsubject, ['stat_spectral_sp_entropy_mfj_corr'])
        # l_feat_remove += l_feat_remove_all
        XTRAIN, aFeatNames_tr, ind_remove = remove_features_by_name(
            XTRAIN, aFeatNames_tr, l_feat_remove)
        XTEST, aFeatNames_ts, ind_remove = remove_features_by_name(
            XTEST, aFeatNames_ts, l_feat_remove)

        if verbose:
            print '-- Removed features with covariate shift: '
            print 'TRAIN :', XTRAIN.shape
            print 'XTEST :', XTEST.shape

    data_tr['X'] = XTRAIN
    data_tr['y'] = ytrain
    data_tr['aFeatNames'] = aFeatNames_tr
    data_tr['plabels'] = plabels_tr
    data_tr['ind_nan'] = ind_nan_tr

    data_ts['X'] = XTEST
    data_ts['y'] = ytest
    data_ts['plabels'] = plabels_ts
    data_ts['aFeatNames'] = aFeatNames_ts
    data_ts['ind_nan'] = ind_nan_ts
    # data_tr = [XTRAIN, ytrain, aFeatNames_tr, aFiles_tr, plabels_tr, data_q_tr, ind_nan_tr]
    # data_ts = [XTEST, ytest, aFeatNames_ts, aFiles_ts, plabels_ts, data_q_ts, ind_nan_ts]
    return data_tr, data_ts