def remove_by_y(self, y_): corr = self.cov_y(self.data, y_) corr = self.feature_fold(self, corr) lcount = self.list_count fea_all = [] score = name_to_name(corr, search=lcount, search_which=0, return_which=(1, ), two_layer=True) for score_i, list_i in zip(score, lcount): indexs = np.argmax(score_i) feature_index = list_i[indexs] fea_all.append(feature_index) fea_all = sorted(list(set(fea_all))) return fea_all
scoring=scoring1, param_grid=param_grid1, n_jobs=1) # n_select = [1,] n_select = (2, 3) clf = Exhaustion(estimator, n_select=n_select, muti_grade=2, muti_index=[2, X.shape[1]], must_index=None, n_jobs=1, refit=True).fit(X, y) name_ = name_to_name(X_frame.columns.values, search=[i[0] for i in clf.score_ex[:10]], search_which=0, return_which=(1, ), two_layer=True) sc = np.array(clf.scatter) for i in clf.score_ex[:]: print(i[1]) for i in name_: print(i) t = clf.predict(X) p = BasePlot() p.scatter(y, t, strx='True $E_{gap}$', stry='Calculated $E_{gap}$') plt.show() p.scatter(sc[:, 0], sc[:, 1], strx='Number', stry='Score') plt.show()
method_name = ['GPR-set', 'SVR-set', 'KRR-set', 'KNR-set', 'GBR-em', 'AdaBR-em', 'RFR-em', "DTR-em"] index_all = [data.pickle_pd.GPR_set23, data.pickle_pd.SVR_set23, data.pickle_pd.KRR_set23] estimator_all = [] for i in method_name: me1, cv1, scoring1, param_grid1 = dict_method_reg()[i] estimator_all.append(GridSearchCV(me1, cv=cv1, scoring=scoring1, param_grid=param_grid1, n_jobs=1)) """union""" # [print(_[0]) for _ in index_all] index_slice = [tuple(index[0]) for _ in index_all for index in _[:5]] index_slice = list(set(index_slice)) """get x_name and abbr""" index_all_name = name_to_name(X_frame.columns.values, search=[i for i in index_slice], search_which=0, return_which=(1,), two_layer=True) index_all_name = [list(set([re.sub(r"_\d", "", j) for j in i])) for i in index_all_name] [i.sort() for i in index_all_name] index_all_abbr = name_to_name(name_init, abbr_init, search=index_all_name, search_which=1, return_which=2, two_layer=True) parto = [] table = [] for i in range(2): print(i) X = X_frame.values y = y_frame.values scal = preprocessing.MinMaxScaler()
if __name__ == '__main__': import pandas as pd store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\4.symbol') data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp') data_import = data.csv().all_import name_init, abbr_init = data.pickle_pd().name_and_abbr select = [ 'destiny', 'valence electron number', 'distance core electron(schubert)' ] X_frame_abbr = name_to_name(name_init, abbr_init, search=select, search_which=1, return_which=2, two_layer=False) select = ['destiny' ] + [j + "_%i" % i for j in select[1:] for i in range(2)] select_abbr = ['$\\rho_c$'] + [ j + "_%i" % i for j in X_frame_abbr[1:] for i in range(2) ] data216_import = data_import.iloc[np.where( data_import['group_number'] == 216)[0]] data225_import = data_import.iloc[np.where( data_import['group_number'] == 225)[0]] data216_225_import = pd.concat((data216_import, data225_import))
y = y_frame.values # # """calculate corr""" corr = Corr(threshold=0.90, muti_grade=2, muti_index=[3, len(X)]) corr.fit(X_frame) cof_list = corr.count_cof() # """get x_name and abbr""" X_frame_name = corr.transform(X_frame.columns.values) X_frame_name = [i.replace("_0", "") for i in X_frame_name] X_frame_abbr = name_to_name(name_and_abbr.columns.values, list(name_and_abbr.iloc[0, :]), search=X_frame_name, search_which=1, return_which=[ 2, ], two_layer=False) """rename""" # cov = pd.DataFrame(corr.cov_shrink) # cov = cov.set_axis(X_frame_abbr, axis='index', inplace=False) # cov = cov.set_axis(X_frame_abbr, axis='columns', inplace=False) # # fig = plt.figure() # fig.add_subplot(111) # sns.heatmap(cov, vmin=-1, vmax=1, cmap="bwr", linewidths=0.3, xticklabels=True, yticklabels=True, square=True, # annot=True, annot_kws={'size': 3}) # plt.show() # corr_plot(corr.cov_shrink,
X_frame = data225_import.drop(['exp_gap', 'group_number'], axis=1) y_frame = data225_import['exp_gap'] X = X_frame.values y = y_frame.values """calculate corr""" corr = Corr(threshold=0.90, muti_grade=2, muti_index=[2, len(X)]) corr.fit(X_frame) cof_list = corr.count_cof() """get x_name and abbr""" X_frame_name = corr.transform(X_frame.columns.values) X_frame_name = [i.replace("_0", "") for i in X_frame_name] X_frame_abbr = name_to_name(name_init, abbr_init, search=X_frame_name, search_which=1, return_which=2, two_layer=False) """rename""" cov = pd.DataFrame(corr.cov_shrink) cov = cov.set_axis(X_frame_abbr, axis='index', inplace=False) cov = cov.set_axis(X_frame_abbr, axis='columns', inplace=False) fig = plt.figure() fig.add_subplot(111) sns.heatmap(cov, vmin=-1, vmax=1, cmap="bwr", linewidths=0.3, xticklabels=True, yticklabels=True, square=True, annot=True, annot_kws={'size': 3}) plt.show() corr_plot(corr.cov_shrink, X_frame_abbr, left_down="fill", right_top="pie", threshold_right=0, front_raito=0.5) list_name, list_abbr = name_to_name(X_frame_name, X_frame_abbr, search=corr.list_count, search_which=0, return_which=(1, 2),