'model__C': range_c,'model__epsilon': range_e}, ] n_splits = 5 cv = ShuffleSplit(n_splits=n_splits, test_size=0.2) cv = KFold(n_splits=n_splits, shuffle=True) score='neg_mean_absolute_error' gscv = GridSearchCV(pipe, param_grid, cv=cv, scoring=score) gscv.fit(X_train, y_train) print_gscv_score(gscv) y_pred = gscv.predict(X_train) print('train data: ',end="") print_score(y_train, y_pred) # visualize fig = yyplot(y_train, y_pred) #%% # Novelty detection by One Class SVM with optimized hyperparameter clf = OneClassSVM(nu=0.003, kernel=gscv.best_params_['model__kernel'], gamma=gscv.best_params_['model__gamma']) clf.fit(X_train) y_pred = gscv.predict(X_test) # predicted y reliability = clf.predict(X_test) # outliers = -1 data = [] output = 'test2.csv' for i in range(len(X_test)): satom1 = periodic_table.get_el_sp(int(X_test[i][0])) satom2 = periodic_table.get_el_sp(int(X_test[i][1]))
# http://univprof.com/archives/16-06-12-3889388.html # n_splits = 2, 5, 10 # https://datachemeng.com/modelvalidation/ cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0) # estimation for different datasets = OK: MAE, NG: R^2 # http://univprof.com/archives/16-07-04-4453136.html rgr = GridSearchCV(mod, param_grid, cv=cv, scoring='neg_mean_absolute_error') rgr.fit(X_train, y_train) print_gscv_score(rgr) y_pred = rgr.predict(X_train) print('train data: ', end="") print_score(y_train, y_pred) # step 3. test y_pred = rgr.predict(X_test) print('test data: ', end="") print_score(y_test, y_pred) print('{:.2f} seconds '.format(time() - start)) #%% # step 4. visualize outputs # yy-plot (train) y_pred = rgr.predict(X_train) fig = yyplot(y_train, y_pred) # yy-plot (test) y_pred = rgr.predict(X_test) fig = yyplot(y_test, y_pred)