clf = Exhaustion(estimator, n_select=n_select, muti_grade=2, muti_index=[2, X.shape[1]], must_index=None, n_jobs=1, refit=True).fit(X, y) name_ = name_to_name(X_frame.columns.values, search=[i[0] for i in clf.score_ex[:10]], search_which=0, return_which=(1, ), two_layer=True) sc = np.array(clf.scatter) for i in clf.score_ex[:]: print(i[1]) for i in name_: print(i) t = clf.predict(X) p = BasePlot() p.scatter(y, t, strx='True $E_{gap}$', stry='Calculated $E_{gap}$') plt.show() p.scatter(sc[:, 0], sc[:, 1], strx='Number', stry='Score') plt.show() store.to_csv(sc, method_name + "".join([str(i) for i in n_select])) store.to_pkl_pd(clf.score_ex, method_name + "".join([str(i) for i in n_select]))
method_all = [ 'SVR-set', "GPR-set", "RFR-em", "AdaBR-em", "DTR-em", "LASSO-L1", "BRR-L1" ] methods = method_pack(method_all=method_all, me="reg", gd=True) pre_y = [] ests = [] for name, methodi in zip(method_all, methods): methodi.cv = 5 methodi.scoring = "neg_root_mean_squared_error" gd = methodi.fit(X=x_, y=y_) score = gd.best_score_ est = gd.best_estimator_ print(name, "neg_root_mean_squared_error", score) score = cross_val_score( est, X=x_, y=y_, scoring="r2", ).mean() print(name, "r2", score) pre_yi = est.predict(x) pre_y.append(pre_yi) ests.append(est) store.to_pkl_pd(est, name) pre_y.append(y) pre_y = np.array(pre_y).T pre_y = pd.DataFrame(pre_y) pre_y.columns = method_all + ["realy_y"] store.to_csv(pre_y, "wrtem_result")
+ [np.array([0, 0, 0, 1, 0, 0, 0])] * 2 + [np.array([0, 0, 0, 0, 0, 0, 0])] + [ np.array([0, 3, 0, 0, 0, 0, 0])] \ + [np.array([0, 0, 0, 0, 0, 0, 0])] + [np.array([0, 3, 0, 0, 0, -1, 0])] + [ np.array([1, -3, 0, 0, 0, 0, 0])] \ + [np.array([1, -1, -2, 0, 0, 0, 0])] dims.extend([np.array([0, 1, 0, 0, 0, 0, 0]), np.array([0, 2, 0, 0, 0, 0, 0]), np.array([0, 1, 0, 0, 0, 0, 0]), np.array([0, 2, 0, 0, 0, 0, 0]), np.array([1, -3, 0, 0, 0, 0, 0]), np.array([0, 3, 0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0, 0, 0]) ]) return dims name_and_abbr = get_abbr() dims = get_dim() store.to_pkl_pd(dims, "dims") store.to_pkl_pd(name_and_abbr, "name_and_abbr") element_table = element_table.iloc[5:, 7:] feature_select = [ 'lattice constants a', 'lattice constants b', 'lattice constants c', 'radii atomic(empirical)', 'radii atomic(clementi)', 'radii ionic(pauling)', 'radii ionic(shannon)', 'radii covalent', 'radii covalent 2', 'radii metal(waber)', 'distance valence electron(schubert)',
estimator_all.append(GridSearchCV(me1, cv=cv1, scoring=scoring1, param_grid=param_grid1, n_jobs=1)) """union""" index_all = [tuple(index[0]) for _ in index_all for index in _[:10]] index_all = list(set(index_all)) """get x_name and abbr""" index_all_name = name_to_name(X_frame.columns.values, search=[i for i in index_all], search_which=0, return_which=(1,), two_layer=True) index_all_name = [list(set([re.sub(r"_\d", "", j) for j in i])) for i in index_all_name] [i.sort() for i in index_all_name] index_all_abbr = name_to_name(name_init, abbr_init, search=index_all_name, search_which=1, return_which=2, two_layer=True) store.to_pkl_pd(index_all, "index_all") store.to_csv(index_all_name, "index_all_name") store.to_csv(index_all_abbr, "index_all_abbr") ugs = UGS(estimator_all, index_all, estimator_n=[2, 3], n_jobs=3) ugs.fit(X, y) # re = gs.cv_score_all(index_all) binary_distance = ugs.cal_binary_distance_all(index_all, estimator_i=3) # slice_k = gs._cv_predict_all(estimator_i=3) groups = ugs.cal_group(estimator_i=3, printing=True, print_noise=0.2, pre_binary_distance_all=binary_distance) ugs.cluster_print(binary_distance, highlight=[1, 2, 3]) # groups = ugs.cal_t_group(printing=False, pre_group=None) # ss=ugs.select_ugs(alpha=0.01) # results = gs.select_gs(alpha=0.01) # gs.cal_group(eps=0.10, estimator_i=1, printing=True, pre_binary_distance_all=slice_g, print_noise=0.1,