data = pd.read_csv(filepath_or_buffer='tc_train.csv',
                       index_col=0,
                       header=0,
                       sep=',')
    data.drop(['formula'], axis=1, inplace=True)
    print(data.describe())

#%%
# Set the parameters by cross-validation

n_splits = 3
cv = ShuffleSplit(n_splits=n_splits, test_size=0.3)
cv = KFold(n_splits=n_splits, shuffle=True)
gscv = GridSearchCV(model, param_grid, cv=cv)
gscv.fit(X_train, y_train)
print_gscv_score_rgr(gscv, X_train, X_test, y_train, y_test, cv)

#%%
# Prediction
y_pred = gscv.predict(X_test)

# Applicability Domain (inside: +1, outside: -1)
iappd = 1
if (iappd == 1):
    y_appd = ad_knn(X_train, X_test)
else:
    y_appd = ad_ocsvm(X_train, X_test)

data = []
for i in range(len(X_test)):
    temp = (f_test[i], int(X_test[i][0]), int(y_pred[i]), y_appd[i])
Exemple #2
0
    pd.plotting.scatter_matrix(data)
    plt.show()

#%%
# Set the parameters by cross-validation

n_splits = 3 
icv=1
if(icv==1):
    cv = KFold(n_splits=n_splits, shuffle=True)
else:
    cv = ShuffleSplit(n_splits=n_splits)

gscv = GridSearchCV(model, param_grid, cv=cv)
gscv.fit(X_train, y_train)
print_gscv_score_rgr(gscv, X_train, X_test, y_train, y_test, cv)

print_gscv_score_rgr(gscv, X, X_pred, y, y_pred_db, cv)

best = gscv.best_estimator_
best.fit(X, y)
if(key=='RF' or key=='GB'):
    f_impo = best.feature_importances_
    print(f_impo)

#%%
# Prediction test
ltest=False
if(ltest):
    y_pred = gscv.predict(X_pred)