'model__C': range_c,'model__epsilon': range_e},
]
n_splits = 5 
cv = ShuffleSplit(n_splits=n_splits, test_size=0.2)
cv = KFold(n_splits=n_splits, shuffle=True)
score='neg_mean_absolute_error'

gscv = GridSearchCV(pipe, param_grid, cv=cv, scoring=score)
gscv.fit(X_train, y_train)
print_gscv_score(gscv)

y_pred = gscv.predict(X_train)
print('train data: ',end="")
print_score(y_train, y_pred)
# visualize
fig = yyplot(y_train, y_pred)

#%%
# Novelty detection by One Class SVM with optimized hyperparameter
clf = OneClassSVM(nu=0.003, kernel=gscv.best_params_['model__kernel'],
  gamma=gscv.best_params_['model__gamma'])
clf.fit(X_train)

y_pred = gscv.predict(X_test)    # predicted y
reliability = clf.predict(X_test) # outliers = -1

data = []
output = 'test2.csv'
for i in range(len(X_test)):
    satom1 = periodic_table.get_el_sp(int(X_test[i][0])) 
    satom2 = periodic_table.get_el_sp(int(X_test[i][1])) 
# http://univprof.com/archives/16-06-12-3889388.html
# n_splits = 2, 5, 10
# https://datachemeng.com/modelvalidation/
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)

# estimation for different datasets = OK: MAE, NG: R^2
# http://univprof.com/archives/16-07-04-4453136.html
rgr = GridSearchCV(mod, param_grid, cv=cv, scoring='neg_mean_absolute_error')
rgr.fit(X_train, y_train)
print_gscv_score(rgr)

y_pred = rgr.predict(X_train)
print('train data: ', end="")
print_score(y_train, y_pred)

# step 3. test
y_pred = rgr.predict(X_test)
print('test  data: ', end="")
print_score(y_test, y_pred)
print('{:.2f} seconds '.format(time() - start))

#%%

# step 4. visualize outputs
# yy-plot (train)
y_pred = rgr.predict(X_train)
fig = yyplot(y_train, y_pred)

# yy-plot (test)
y_pred = rgr.predict(X_test)
fig = yyplot(y_test, y_pred)