# Applicability Domain (inside: +1, outside: -1) iappd = 1 if (iappd == 1): y_appd = ad_knn(X_train, X_test) else: y_appd = ad_ocsvm(X_train, X_test) data = [] for i in range(len(X_test)): temp = (f_test[i], int(X_test[i][0]), int(y_pred[i]), y_appd[i]) data.append(temp) properties = ['formula', 'P', 'Tc', 'AD'] df = pd.DataFrame(data, columns=properties) df.sort_values('Tc', ascending=False, inplace=True) # df.to_csv(output, index=False) df_in_ = df[df.AD == 1] df_in_.to_csv(output, index=False) print('Predicted Tc is written in file {}'.format(output)) #%% niter = 10 if (True): dcv_rgr(X_train, y_train, model, param_grid, niter) y_randamization_rgr(X_train, y_train, model, param_grid, niter) # print(X_train[:10]) print('{:.2f} seconds '.format(time() - start))
X_test = scaler.transform(X_test) model = Ridge() range_a = 0.01 * np.arange(1, 71, dtype=int) param_grid = [{'alpha': range_a}] cv = ShuffleSplit(n_splits=5, test_size=0.2) cv = KFold(n_splits=5, shuffle=True) gscv = GridSearchCV(model, param_grid, cv=cv) gscv.fit(X_train, y_train) print_gscv_score_rgr(gscv, X_train, X_test, y_train, y_test, cv) # Predicted y y_pred = gscv.predict(X_test) # Applicability Domain (inside: +1, outside: -1) y_appd = ad_knn(X_train, X_test) results = np.c_[y_pred, y_test, y_appd] columns = ['predicted y', 'observed y', 'AD'] df = pd.DataFrame(results, columns=columns) # print(df[df.AD == 1]) print(df) if (False): dcv_rgr(X, y, model, param_grid, 10) print('{:.2f} seconds '.format(time() - start))
# Applicability Domain (inside: +1, outside: -1) y_appd = ad_knn(X_train, X_test) data = [] for i in range(len(X_test)): satom1 = periodic_table.get_el_sp(int(X_test[i][0])) satom2 = periodic_table.get_el_sp(int(X_test[i][1])) natom1 = int(X_test[i][2]) natom2 = int(X_test[i][3]) str_mat = str(satom1) + str(natom1) + str(satom2) + str(natom2) formula = Composition(str_mat).reduced_formula temp = (formula, int(X_test[i][4]), int(y_pred[i]), y_appd[i]) data.append(temp) properties = ['formula', 'P', 'Tc', 'AD'] df = pd.DataFrame(data, columns=properties) df.sort_values('Tc', ascending=False, inplace=True) output = 'test2_Tc_kNN_AD_DCV.csv' # df.to_csv(output, index=False) df_in_ = df[df.AD == 1] df_in_.to_csv(output, index=False) print('Predicted Tc is written in file {}'.format(output)) if (True): param_grid = [{'n_neighbors': range_k}] dcv_rgr(X_train, y_train, model, param_grid, 10) print('{:.2f} seconds '.format(time() - start))
y_appd = ad_knn(X, X_pred) elif(iappd == 2): y_appd = ad_ocsvm(X, X_pred) else: y_appd = ad_knn_list(X, X_pred, 10) data = [] for i in range(len(X_pred)): # temp = (f_pred[i], int(P_pred[i]), int(y_pred[i]), int(y_pred_db[i]), y_appd[i]) temp = (f_pred[i], int(P_pred[i]), int(y_pred[i]), int(y_pred_db[i])) data.append(temp) # properties=['formula','P', 'Tc(pred)', 'Tc(DB)','AD'] properties=['formula','P', 'Tc(pred)', 'Tc(DB)'] df = pd.DataFrame(data, columns=properties) # df.sort_values('Tc', ascending=False, inplace=True) # df.to_csv(output, index=False) # df_in_ = df[df.AD == 1] # df_in_.to_csv(output, index=False) df.to_csv(output, index=False) print('Predicted Tc is written in file {}'.format(output)) #%% niter=10 if(False): dcv_rgr(X, y, model, param_grid, niter) y_randamization_rgr(X, y, model, param_grid, niter) print('{:.2f} seconds '.format(time() - start))