def to_csv(self, data_all): """store to csv""" if self.store: if isinstance(self.store, str): path = self.store else: path = os.getcwd() file_new_name = "_".join((str(self.pop), str(self.gen), str(self.mutate_prob), str(self.mate_prob), str(time.time()))) try: st = Store(path) st.to_csv(data_all, file_new_name, transposition=True) print("store data to ", path, file_new_name) except (IOError, PermissionError): st = Store(os.getcwd()) st.to_csv(data_all, file_new_name, transposition=True) print("store data to ", os.getcwd(), file_new_name)
def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=__debug__, pset=None, store=True): """ Parameters ---------- population toolbox cxpb mutpb ngen stats halloffame verbose pset store Returns ------- """ rst = random.getstate() len_pop = len(population) logbook = Logbook() logbook.header = [] + (stats.fields if stats else []) data_all = {} random.setstate(rst) for gen in range(1, ngen + 1): "评价" rst = random.getstate() """score""" invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.parallel(iterable=population) for ind, fit, in zip(invalid_ind, fitnesses): ind.fitness.values = fit[0], ind.expr = fit[1] ind.y_dim = fit[2] ind.withdim = fit[3] random.setstate(rst) rst = random.getstate() """elite""" add_ind = [] add_ind1 = toolbox.select_kbest_target_dim(population, K_best=0.05 * len_pop) add_ind += add_ind1 elite_size = len(add_ind) random.setstate(rst) rst = random.getstate() """score""" random.setstate(rst) rst = random.getstate() """record""" if halloffame is not None: halloffame.update(add_ind1) if len(halloffame.items ) > 0 and halloffame.items[-1].fitness.values[0] >= 0.9999: print(halloffame.items[-1]) print(halloffame.items[-1].fitness.values[0]) break random.setstate(rst) rst = random.getstate() """Dynamic output""" record = stats.compile_(population) if stats else {} logbook.record(gen=gen, pop=len(population), **record) if verbose: print(logbook.stream) random.setstate(rst) """crossover, mutate""" offspring = toolbox.select_gs(population, len_pop - elite_size) # Vary the pool of individuals offspring = varAnd(offspring, toolbox, cxpb, mutpb) rst = random.getstate() """re-run""" offspring.extend(add_ind) population[:] = offspring random.setstate(rst) store = Store() store.to_csv(data_all) return population, logbook
"""union""" index_all = [tuple(index[0]) for _ in index_all for index in _[:10]] index_all = list(set(index_all)) """get x_name and abbr""" index_all_name = name_to_name(X_frame.columns.values, search=[i for i in index_all], search_which=0, return_which=(1,), two_layer=True) index_all_name = [list(set([re.sub(r"_\d", "", j) for j in i])) for i in index_all_name] [i.sort() for i in index_all_name] index_all_abbr = name_to_name(name_init, abbr_init, search=index_all_name, search_which=1, return_which=2, two_layer=True) store.to_pkl_pd(index_all, "index_all") store.to_csv(index_all_name, "index_all_name") store.to_csv(index_all_abbr, "index_all_abbr") ugs = UGS(estimator_all, index_all, estimator_n=[2, 3], n_jobs=3) ugs.fit(X, y) # re = gs.cv_score_all(index_all) binary_distance = ugs.cal_binary_distance_all(index_all, estimator_i=3) # slice_k = gs._cv_predict_all(estimator_i=3) groups = ugs.cal_group(estimator_i=3, printing=True, print_noise=0.2, pre_binary_distance_all=binary_distance) ugs.cluster_print(binary_distance, highlight=[1, 2, 3]) # groups = ugs.cal_t_group(printing=False, pre_group=None) # ss=ugs.select_ugs(alpha=0.01) # results = gs.select_gs(alpha=0.01) # gs.cal_group(eps=0.10, estimator_i=1, printing=True, pre_binary_distance_all=slice_g, print_noise=0.1, # node_name=index_all_abbr)
scatter(ytest, y_pre_test, strx='y_true($10^4$T)', stry='y_predict($10^4$T)') scatter(ytrain, y_pre_train, strx='y_true($10^4$T)', stry='y_predict($10^4$T)') def scatter2(x, y_true, y_predict, strx='y_true', stry1='y_true(GWh)', stry2='y_predict', stry="y"): fig = plt.figure() ax = fig.add_subplot(111) l1 = ax.scatter(x, y_true, marker='o', s=50, alpha=0.7, c='orange', linewidths=None, edgecolors='blue') ax.plot(x, y_true, '-', ms=5, lw=2, alpha=0.7, color='black') l2 = ax.scatter(x, y_predict, marker='^', s=50, alpha=0.7, c='green', linewidths=None, edgecolors='blue') ax.plot(x, y_predict, '-', ms=5, lw=2, alpha=0.7, color='green') # ax.plot([min(x), max(x)], [min(x), max(x)], '--', ms=5, lw=2, alpha=0.7, color='black') plt.xlabel(strx) plt.legend((l1, l2), (stry1, stry2), loc='upper left') plt.ylabel(stry) plt.show() a = np.arange(2000, 2020) scatter2(a, y[::-1], y_[::-1], strx='year', stry="y($10^4$T)", stry1='y_true($10^4$T)', stry2='y_predict($10^4$T)') # #导出 print(x_frame.iloc[:, :].columns.values[ba.support_]) store.to_pkl_sk(ba.estimator_, "model") all_import["y_predict"] = y_ store.to_csv(all_import, "predict")
dim_init = data.dims index_all_name = name_to_name(X_frame.columns.values, search=[i for i in index_slice], search_which=0, return_which=(1, ), two_layer=True) index_all_name = [ list([re.sub(r"_\d", "", j) for j in i]) for i in index_all_name ] index_all_dim = name_to_name(name_init, dim_init, search=index_all_name, search_which=1, return_which=2, two_layer=True) dim_target = [ dimension_check(list(dim), np.array([1, 2, -2, 0, 0, 0, 0])) for dim in index_all_dim ] dim_1 = [dimension_check(dim) for dim in index_all_dim] result['dim1'] = dim_1 result['dim_target'] = dim_target result = result.sort_values(by="all_mean", ascending=False) store.to_csv(result, "result")
batch_size=40, re_hall=3, n_jobs=12, mate_prob=0.9, max_value=5, mutate_prob=0.8, tq=False, dim_type="coef", re_Tree=0, store=False, random_state=12, verbose=True, stats={ "fitness_dim_max": ["max"], "dim_is_target": ["sum"] }, add_coef=True, inner_add=False, cal_dim=True, vector_add=True, personal_map=False) # b = time.time() exps = bl.run() print([i.coef_expr for i in exps]) score = exps.keys[0].values[0] name = group_str(exps[0], pset0, feature_name=True) dicts["s%s" % i] = [score, name] print(i) store.to_csv(dicts, model="a+")
] select = ['volume'] + [j + "_%i" % i for j in select[1:] for i in range(2)] X_frame = data225_import[select] y_frame = data225_import['exp_gap'] X = X_frame.values y = y_frame.values name, rep_name = getName(X_frame) x0, x1, x2, x3, x4, x5, x6 = rep_name expr01 = sympy.log(1 / (x1 + x2) * x0 / (x5 + x6) * x4 / x3) results = calculateExpr(expr01, pset=None, x=X, y=y, score_method=r2_score, add_coeff=True, del_no_important=False, filter_warning=True, terminals=rep_name, inter_add=True, iner_add=False, random_add=False) print(select) print(results) store.to_csv(data216_225_221import, "plot221225216")
param_grid3 = [{'n_estimators': [100, 200], 'learning_rate': [0.1, 0.05]}] # 2 model ref = RFECV(me2, cv=3) x_ = ref.fit_transform(x, y) gd = GridSearchCV(me2, cv=3, param_grid=param_grid2, scoring="r2", n_jobs=1) gd.fit(x_, y) score = gd.best_score_ # 1,3 model # gd = GridSearchCV(me1, cv=3, param_grid=param_grid1, scoring="r2", n_jobs=1) # gd.fit(x,y) # es = gd.best_estimator_ # sf = SelectFromModel(es, threshold=None, prefit=False, # norm_order=1, max_features=None) # sf.fit(x,y) # feature = sf.get_support() # # gd.fit(x[:,feature],y) # score = gd.best_score_ # 其他模型 # 穷举等... # 导出 # pd.to_pickle(gd,r'C:\Users\Administrator\Desktop\skk\gd_model') # pd.read_pickle(r'C:\Users\Administrator\Desktop\skk\gd_model') store.to_pkl_sk(gd) store.to_csv(x) store.to_txt(score)
x_rame = (all_import_title['electron number_0'] + all_import_title['electron number_1']) / all_import_title[ 'cell volume'] all_import_title.insert(10, "electron density", x_rame, ) # store.to_csv(all_import_title, "all_import_title", reverse=False) all_import = all_import_title.drop( ['name_number', "cell density", 'name_number', "name", "structure", "structure_type", "space_group", "reference", 'material_id', 'composition', "com_0", "com_1"], axis=1) all_import = all_import.iloc[np.where(all_import['group_number'] == 225)[0]] all_import = all_import.drop(['group_number'], axis=1) store.to_csv(all_import, "all_import", transposition=False) def get_abbr(): name = ["electron density", "cell density", 'cell volume', "component"] abbrTex = [r"$\rho_e$", r"$\rho_c$", "$V_c$", "$com$"] abbr = [r"rho_e", r"rho_c", "V_c", "com"] for i, j, k in zip(name, abbrTex, abbr): name_and_abbr.insert(0, i, [j, k]) get_abbr() store.to_csv(name_and_abbr, "name_and_abbr", transposition=False)
def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=__debug__, pset=None, store=True): """ Parameters ---------- population toolbox cxpb mutpb ngen stats halloffame verbose pset store Returns ------- """ rst = random.getstate() len_pop = len(population) logbook = Logbook() logbook.header = [] + (stats.fields if stats else []) data_all = {} random.setstate(rst) for gen in range(1, ngen + 1): "评价" rst = random.getstate() """score""" invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.parallel(iterable=population) for ind, fit, in zip(invalid_ind, fitnesses): ind.fitness.values = fit[0], ind.expr = fit[1] ind.dim = fit[2] ind.withdim = fit[3] random.setstate(rst) rst = random.getstate() """elite""" add_ind = [] add_ind1 = toolbox.select_kbest_target_dim(population, K_best=0.01 * len_pop) add_ind2 = toolbox.select_kbest_dimless(population, K_best=0.01 * len_pop) add_ind3 = toolbox.select_kbest(population, K_best=5) add_ind += add_ind1 add_ind += add_ind2 add_ind += add_ind3 elite_size = len(add_ind) random.setstate(rst) rst = random.getstate() """score""" if store: subp = functools.partial(sub, subed=pset.rep_name_list, subs=pset.real_name_list) data = { "gen{}_pop{}".format(gen, n): { "gen": gen, "pop": n, "score": i.fitness.values[0], "expr": str(subp(i.expr)), } for n, i in enumerate(population) if i is not None } data_all.update(data) random.setstate(rst) rst = random.getstate() """record""" if halloffame is not None: halloffame.update(add_ind3) if len(halloffame.items ) > 0 and halloffame.items[-1].fitness.values[0] >= 0.95: print(halloffame.items[-1]) print(halloffame.items[-1].fitness.values[0]) break random.setstate(rst) rst = random.getstate() """Dynamic output""" record = stats.compile(population) if stats else {} logbook.record(gen=gen, pop=len(population), **record) if verbose: print(logbook.stream) random.setstate(rst) """crossover, mutate""" offspring = toolbox.select_gs(population, len_pop - elite_size) # Vary the pool of individuals offspring = varAnd(offspring, toolbox, cxpb, mutpb) rst = random.getstate() """re-run""" offspring.extend(add_ind) population[:] = offspring random.setstate(rst) store = Store() store.to_csv(data_all) return population, logbook
clf = Exhaustion(estimator, n_select=n_select, muti_grade=2, muti_index=[2, X.shape[1]], must_index=None, n_jobs=1, refit=True).fit(X, y) name_ = name_to_name(X_frame.columns.values, search=[i[0] for i in clf.score_ex[:10]], search_which=0, return_which=(1, ), two_layer=True) sc = np.array(clf.scatter) for i in clf.score_ex[:]: print(i[1]) for i in name_: print(i) t = clf.predict(X) p = BasePlot() p.scatter(y, t, strx='True $E_{gap}$', stry='Calculated $E_{gap}$') plt.show() p.scatter(sc[:, 0], sc[:, 1], strx='Number', stry='Score') plt.show() store.to_csv(sc, method_name + "".join([str(i) for i in n_select])) store.to_pkl_pd(clf.score_ex, method_name + "".join([str(i) for i in n_select]))
com_data = pd.read_excel(r'C:\Users\Administrator\Desktop\band_gap_exp_last\init_band_data.xlsx', sheet_name='binary_4_structure', header=0, skiprows=None, index_col=0, names=None) composition = pd.Series(map(eval, com_data['composition'])) composition_mp = pd.Series(map(mg.Composition, composition)) """for element site""" com_mp = pd.Series([i.to_reduced_dict for i in composition_mp]) # com_mp = composition_mp all_import = data.csv.all_import id_structures = data.id_structures structures = id_structures vor_area = count_voronoinn(structures, mess="area") vor_dis = count_voronoinn(structures, mess="face_dist") vor = pd.DataFrame() vor.insert(0, 'vor_area0', vor_area[:, 0]) vor.insert(0, 'face_dist0', vor_dis[:, 0]) vor.insert(0, 'vor_area1', vor_area[:, 1]) vor.insert(0, 'face_dist1', vor_dis[:, 1]) data_title = all_import[ ['name_number', "x_name", "structure", "structure_type", "space_group", "reference", 'material_id', 'composition', 'exp_gap', 'group_number']] data_tail = all_import.drop( ['name_number', "x_name", "structure", "structure_type", "space_group", "reference", 'material_id', 'composition', 'exp_gap', 'group_number'], axis=1) data_import = data_title.join(vor[["face_dist0", "vor_area0", "face_dist1", "vor_area1"]]) data_import = data_import.join(data_tail) store.to_csv(data_import, "all_import")
all_import_title = com_data.join(ele_ratio) all_import_title = all_import_title.join(depart_elements_table) """sub density to e density""" select2 = ['electron number_0', 'electron number_1', 'cell volume'] x_rame = (all_import_title['electron number_0'] + all_import_title['electron number_1'] ) / all_import_title['cell volume'] all_import_title['cell density'] = x_rame all_import_title.rename(columns={'cell density': "electron density"}, inplace=True) name = [ "electron density" if i == "cell density" else i for i in name_and_abbr[0] ] abbr = [r"$\rho_e$" if i == r"$\rho_c$" else i for i in name_and_abbr[1]] name_and_abbr = [name, abbr] dims[-3] = np.array([0, -3, 0, 0, 0, 0, 0]) store.to_csv(all_import_title, "all_import_title") all_import = all_import_title.drop([ 'name_number', 'name_number', "name", "structure", "structure_type", "space_group", "reference", 'material_id', 'composition', "com_0", "com_1" ], axis=1) store.to_pkl_pd(dims, "dims") store.to_pkl_pd(name_and_abbr, "name_and_abbr") store.to_csv(all_import, "all_import")
# # # 预处理 # minmax = MinMaxScaler() # x = minmax.fit_transform(x) x_, y_ = shuffle(x, y, random_state=2) # # # 建模 method_all = ['SVR-set', "GPR-set", "RFR-em", "AdaBR-em", "DTR-em", "LASSO-L1", "BRR-L1"] methods = method_pack(method_all=method_all, me="reg", gd=True) pre_y = [] ests = [] for name, methodi in zip(method_all, methods): methodi.cv = 5 methodi.scoring = "neg_root_mean_squared_error" gd = methodi.fit(X=x_, y=y_) score = gd.best_score_ est = gd.best_estimator_ print(name, "neg_root_mean_squared_error", score) score = cross_val_score(est, X=x_, y=y_, scoring="r2", ).mean() print(name, "r2", score) pre_yi = est.predict(x) pre_y.append(pre_yi) ests.append(est) store.to_pkl_pd(est, name) pre_y.append(y) pre_y = np.array(pre_y).T pre_y = pd.DataFrame(pre_y) pre_y.columns = method_all + ["realy_y"] store.to_csv(pre_y, "wrtem_result")
pre_y = sl.predict(x) r = np.corrcoef(np.vstack((pre_y, y)))[1, 0] error = np.mean(np.abs((y - pre_y) / y)) r2 = sl.score(x, y, "r2") mae = sl.score(x, y, "neg_mean_absolute_error") sl.loop.cpset.cv = 5 r2_cv = sl.cv_result(refit=False) print("r:{},error:{},r2:{},MAE:{},r2_cv:{}".format(r, error, r2, mae, r2_cv[0])) data = sl.loop.top_n(20, ascending=False) st.end() st.to_csv(data, file_new_name="top_n") # if __name__ == "__main__": # pa_factor, pa_dim = Dim.convert_to(10 * 6 * pa) # ###########第一个########### # """数据""" # com_data = pd.read_csv(r'FCC-BCC.csv') # x = com_data.iloc[:, :-1].values # y = com_data.iloc[:, -1].values # x, y = shuffle(x, y, random_state=0) # # st = Store("FCC-BCC_result_error_no_intercept") # st.start() # sl = SymbolLearning(loop=r'MultiMutateLoop', cal_dim=True, pop=5000, dim_type = pa_dim, # gen=50, add_coef=True, re_hall=2, # inter_add=False,batch_size=50,