예제 #1
0
    # advanced: 'fft','max3fftpeaks','mean_std_max3fftpeaks'
    
    feats = ['mean_std_max3fftpeaks']
    
    # =============================================================================
    # CLASSIFIERS:
    # =============================================================================
    # XGBOOST params - max_depth, min_child_weight, gamma
    
#    clfs = [XGB(max_depth=x) for x in range(1,10)]+\
#            [XGB(min_child_weight=x) for x in range(1,10)]+\
#            [XGB(gamma=x) for x in np.linspace(0,1,10)]
    
    X = data[0]
    X = getRelevantData(X,'vel_acc')
    X_f = getFeatures(X,'mean_std_max3fftpeaks')
    y = data[1]
    groups = data[2]
    
    scores = []
    clf = AutoC()
    cv = GroupShuffleSplit(n_splits=1,test_size=0.2)
    for train_index, test_index in cv.split(X_f,y,groups):
        # Split data to train and test set
        X_train = X[train_index]
        y_train = y[train_index]

        X_test = X[test_index]
        y_test = y[test_index]
        
        clf.fit(X_train,y_train)
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier
from own_functions import loadData, getRelevantData, getFeatures
from sklearn.model_selection import GridSearchCV
from os import getcwd
from sklearn.model_selection import GroupShuffleSplit

folder = getcwd() + '/robotsurface/'
data = loadData(folder)

X = data[0]
y = data[1]
groups = data[2]

X = getRelevantData(X, 'vel_acc')
X_feat = getFeatures(X, 'fftlog10')

clf1 = LogisticRegression(solver='lbfgs',
                          multi_class='multinomial',
                          random_state=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                        voting='soft')

params = {'lr__C': [1.0, 100.0], 'rf__n_estimators': [20, 200]}

cv = GroupShuffleSplit(n_splits=30, test_size=0.2, random_state=0)
grid = GridSearchCV(estimator=eclf, param_grid=params, cv=cv)
grid = grid.fit(X_feat, y, groups)
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import GroupShuffleSplit, cross_val_score
from own_functions import getFeatures, loadData, getRelevantData, printScores, makeSubmissionFile

from os import getcwd
import numpy as np
folder = getcwd() + '/robotsurface/'
data = loadData(folder)
X = data[0]
y = data[1]
groups = data[2]
limit = 'vel_acc'
feature = 'fftlog10'
X = getRelevantData(X, limit)
X_feats = getFeatures(X, feature, False, 0)

params = {
    'bootstrap': [True, False],
    'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
    'max_features': ['auto', 'sqrt'],
    'min_samples_leaf': [1, 2, 4],
    'min_samples_split': [2, 5, 10],
    'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]
}

cv = GroupShuffleSplit(n_splits=3, test_size=0.2, random_state=0)
clf = RandomizedSearchCV(RFC(n_jobs=-1, n_estimators=100),
                         params,
                         cv=cv,
                         n_iter=100)
예제 #4
0
from os import getcwd
import numpy as np
from own_functions import loadData, getRelevantData, getFeatures, getMaxpeaks
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from xgboost import XGBClassifier as XGB
from xgboost import plot_importance
import matplotlib.pyplot as plt

folder = getcwd() + '/robotsurface/'
data = loadData(folder)

X = data[0]
X = getRelevantData(X, 'all')
X_f = getFeatures(X, 'mean_std')
y = data[1]
clf = XGB()
clf.fit(X_f, y)
# %%
plt.figure(figsize=(20, 10))
ax = plt.axes()
plot_importance(clf, ax)
plt.show()

# %% see feature order

fft = np.abs(np.fft.fft(X))[:, :, :63]
fftmean = np.expand_dims(np.mean(fft, 2), axis=2)
fftstd = np.expand_dims(np.std(fft, 2), axis=2)
mean = np.expand_dims(np.mean(X, 2), axis=2)