def to_csv(self, data_all): """store to csv""" if self.store: if isinstance(self.store, str): path = self.store else: path = os.getcwd() file_new_name = "_".join((str(self.pop), str(self.gen), str(self.mutate_prob), str(self.mate_prob), str(time.time()))) try: st = Store(path) st.to_csv(data_all, file_new_name, transposition=True) print("store data to ", path, file_new_name) except (IOError, PermissionError): st = Store(os.getcwd()) st.to_csv(data_all, file_new_name, transposition=True) print("store data to ", os.getcwd(), file_new_name)
import pandas as pd from bgp.featurizers.compositionfeaturizer import WeightedAverage from bgp.selection.corr import Corr from mgetool.exports import Store from mgetool.imports import Call from pymatgen import Composition from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor from sklearn.feature_selection import RFECV from sklearn.linear_model import BayesianRidge from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import MinMaxScaler from sklearn.tree import DecisionTreeRegressor # 数据导入 store = Store(r'C:\Users\Administrator\Desktop\skk') data = Call(r'C:\Users\Administrator\Desktop\skk') all_import = data.csv().skk # """for element site""" element_table = pd.read_excel( r'C:\Users\Administrator\Desktop\band_gap_exp\element_table.xlsx', header=4, skiprows=0, index_col=0) element_table = element_table.iloc[5:, 7:] # 其他数据获取 feature_select = [ 'lattice constants a', 'lattice constants b', 'lattice constants c',
# print("r:{},error:{},r2:{},MAE:{},r2_cv:{}".format(r, error, r2, mae, r2_cv[0])) # # data = sl.loop.top_n(20, ascending=False) # st.end() # st.to_csv(data, file_new_name="top_n") if __name__ == "__main__": pa_factor, pa_dim = Dim.convert_to(10 * 6 * pa) ###########第一个########### """数据""" com_data = pd.read_csv(r'FCC.csv') x = com_data.iloc[:, :-1].values y = com_data.iloc[:, -1].values x, y = shuffle(x, y, random_state=0) st = Store("FCC_result_error_no_intercept") st.start() sl = SymbolLearning(loop=r'MultiMutateLoop', cal_dim=False, dim_type=pa_dim, pop=5000, gen=50, add_coef=True, re_hall=2, inter_add=False, random_state=2, n_jobs=16, initial_max=2, max_value=4, store=True, stats={"fitness_dim_max": ("max", )})
# p = BasePlot(font=None) # p.scatter(y, y_pre, strx='Experimental $Frequency$', stry='Calculated $Frequency$') # import matplotlib.pyplot as plt # # plt.show() X = q1[:, :-1] y = q1[:, -1] X = np.concatenate((X, (X[:, 1] / X[:, 0]**2).reshape(-1, 1)), axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0) store = Store() # symbolset pset0 = SymbolSet() pset0.add_features(X_train, y_train) pset0.add_constants(c=[ 1, ]) pset0.add_operations( power_categories=(2, ), categories=("Add", "exp", "Neg"), ) h_bgp = 3 # stop = None
def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=__debug__, pset=None, store=True): """ Parameters ---------- population toolbox cxpb mutpb ngen stats halloffame verbose pset store Returns ------- """ rst = random.getstate() len_pop = len(population) logbook = Logbook() logbook.header = [] + (stats.fields if stats else []) data_all = {} random.setstate(rst) for gen in range(1, ngen + 1): "评价" rst = random.getstate() """score""" invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.parallel(iterable=population) for ind, fit, in zip(invalid_ind, fitnesses): ind.fitness.values = fit[0], ind.expr = fit[1] ind.y_dim = fit[2] ind.withdim = fit[3] random.setstate(rst) rst = random.getstate() """elite""" add_ind = [] add_ind1 = toolbox.select_kbest_target_dim(population, K_best=0.05 * len_pop) add_ind += add_ind1 elite_size = len(add_ind) random.setstate(rst) rst = random.getstate() """score""" random.setstate(rst) rst = random.getstate() """record""" if halloffame is not None: halloffame.update(add_ind1) if len(halloffame.items ) > 0 and halloffame.items[-1].fitness.values[0] >= 0.9999: print(halloffame.items[-1]) print(halloffame.items[-1].fitness.values[0]) break random.setstate(rst) rst = random.getstate() """Dynamic output""" record = stats.compile_(population) if stats else {} logbook.record(gen=gen, pop=len(population), **record) if verbose: print(logbook.stream) random.setstate(rst) """crossover, mutate""" offspring = toolbox.select_gs(population, len_pop - elite_size) # Vary the pool of individuals offspring = varAnd(offspring, toolbox, cxpb, mutpb) rst = random.getstate() """re-run""" offspring.extend(add_ind) population[:] = offspring random.setstate(rst) store = Store() store.to_csv(data_all) return population, logbook
import warnings import numpy as np import pandas as pd from bgp.selection.ugs import UGS from mgetool.exports import Store from mgetool.imports import Call from mgetool.quickmethod import dict_method_reg from mgetool.tool import name_to_name from sklearn import preprocessing, utils from sklearn.model_selection import GridSearchCV warnings.filterwarnings("ignore") if __name__ == '__main__': store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\3.sum') data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp') data_import = data.csv.all_import name_init, abbr_init = data.csv.name_and_abbr select = ['volume', 'destiny', 'lattice constants a', 'lattice constants c', 'radii covalent', 'radii ionic(shannon)', 'distance core electron(schubert)', 'latent heat of fusion', 'energy cohesive brewer', 'total energy', 'charge nuclear effective(slater)', 'valence electron number', 'electronegativity(martynov&batsanov)', 'volume atomic(villars,daams)'] select = ['volume', 'destiny'] + [j + "_%i" % i for j in select[2:] for i in range(2)] data216_import = data_import.iloc[np.where(data_import['group_number'] == 216)[0]] data225_import = data_import.iloc[np.where(data_import['group_number'] == 225)[0]] data216_225_import = pd.concat((data216_import, data225_import))
from multiego.ego import Ego if __name__ == "__main__": from sklearn.datasets import load_boston import numpy as np from multiego.multiplyego import search_space, MultiplyEgo from sklearn.model_selection import GridSearchCV from sklearn.svm import SVR #####model2##### parameters = {'C': [1, 10]} model2 = GridSearchCV(SVR(), parameters) me = BaseMultiplyEgo() st = Store() np.random.seed(0) n = 3 y = np.random.random(size=(100, n)) me.pareto_front_point(y, sign=None) yall = np.random.random(size=(200, 1, n)) yall = yall + np.random.random(size=(200, 1000, n)) / 20 re = me.rank(y=y, predict_y_all=yall)
[ # Rct ** (-1) - beta * F ** 2 / (R * T)*(k1p * (1 - Thetah) - k_1p * Thetah + k2p * Thetah), # taup ** (-1) - F / q * (4 * k3 * Thetah + k1p + k_1p + k2p), Thetah - ((k1p + k_1p + k2p) + sympy.sqrt( (k1p + k_1p + k2p)**2) + 8 * k1p * k3), k1p - k1 * sympy.exp(-beta * F * E / (R * T)), k_1p - k_1 * sympy.exp((1 - beta) * F * E / (R * T)), k2p - k2 * sympy.exp(-beta * F * E / (R * T)), ], [Thetah, k1p, k_1p, k2p]) print(result) from mgetool.exports import Store store = Store(r'C:\Users\Administrator\Desktop\cl') store.to_pkl_pd(result, "result") """fitting""" exps1 = (beta * F**2 / (R * T) * (k1p * (1 - Thetah) - k_1p * Thetah + k2p * Thetah))**(-1) exps2 = (F / q * (4 * k3 * Thetah + k1p + k_1p + k2p))**(-1) exps3 = (beta * F**2 / (R * T) * (k2p - k1p - k_1p) * (k1p * (1 - Thetah) - k_1p * Thetah + k2p * Thetah) / (4 * k3 * Thetah + k2p + k1p + k_1p))**(-1) subbb1 = { Thetah: result[0][0], } subbb2 = { k1p: result[0][1], k_1p: result[0][2],
import pandas as pd from bgp.selection.quickmethod import method_pack from mgetool.exports import Store from sklearn.preprocessing import MinMaxScaler from sklearn.utils import shuffle # 数据导入 store = Store(r'C:\Users\Administrator\Desktop/wr') # """for element site""" data = pd.read_excel(r'C:\Users\Administrator\Desktop/wr/wrpvc.xlsx', header=0, skiprows=None, index_col=0) y = data["t"].values x_p_name = ['t_t', 'v', 'b', 'hat', 'd', 't1'] x = data[x_p_name].values x_name = ["温度刻度", "速度刻度", "风量", "加盖", "焊口距地距离", "焊接前出风口温度"] # # # 预处理 minmax = MinMaxScaler() x = minmax.fit_transform(x) x, y = shuffle(x, y) # m_corr = Corr(threshold=0.85, muti_grade=None, muti_index=None, must_index=None) # m_corr.fit(x) # corr = m_corr.cov
# @Author : Administrator # @Project : feature_toolbox # @FileName: 1.1add_compound_features.py # @Software: PyCharm import pandas as pd import pymatgen as mg from bgp.featurizers.voronoifeature import count_voronoinn from mgetool.exports import Store from mgetool.imports import Call """ this is a description """ store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp_last\1.generate_data') data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp_last\1.generate_data') com_data = pd.read_excel(r'C:\Users\Administrator\Desktop\band_gap_exp_last\init_band_data.xlsx', sheet_name='binary_4_structure', header=0, skiprows=None, index_col=0, names=None) composition = pd.Series(map(eval, com_data['composition'])) composition_mp = pd.Series(map(mg.Composition, composition)) """for element site""" com_mp = pd.Series([i.to_reduced_dict for i in composition_mp]) # com_mp = composition_mp all_import = data.csv.all_import id_structures = data.id_structures structures = id_structures vor_area = count_voronoinn(structures, mess="area") vor_dis = count_voronoinn(structures, mess="face_dist") vor = pd.DataFrame()
# @Project : feature_toolbox # @FileName: 3.0select_method.py # @Software: PyCharm import numpy as np import pandas as pd from bgp.featurizers.compositionfeaturizer import DepartElementFeaturizer from mgetool.exports import Store # from pymatgen import Composition """ this is a description """ if __name__ == "__main__": store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp') com_data = pd.read_excel( r'C:\Users\Administrator\Desktop\band_gap_exp\init_band_data.xlsx', sheet_name='binary_4_structure') """for element site""" element_table = pd.read_excel( r'C:\Users\Administrator\Desktop\band_gap_exp\element_table.xlsx', header=4, skiprows=0, index_col=0) """get x_name and abbr""" def get_abbr(): abbr = list(element_table.loc["abbrTex"]) name = list(element_table.columns)
import numpy as np import pandas as pd from bgp.selection.quickmethod import method_pack from mgetool.exports import Store from sklearn.model_selection import cross_val_score # from sklearn.preprocessing import MinMaxScaler from sklearn.utils import shuffle # 数据导入 store = Store(r'/data/home/wangchangxin/data/wr/tem') data = pd.read_excel(r'/data/home/wangchangxin/data/wr/tem/wrtem2.xlsx', header=0, skiprows=None, index_col=0) y = data["S"].values x_p_name = ["t", 'v', 'hat'] x = data[x_p_name].values # # # 预处理 # minmax = MinMaxScaler() # x = minmax.fit_transform(x) x_, y_ = shuffle(x, y, random_state=2) # # # 建模 method_all = ['SVR-set', "GPR-set", "RFR-em", "AdaBR-em", "DTR-em", "LASSO-L1", "BRR-L1"] methods = method_pack(method_all=method_all, me="reg", gd=True) pre_y = [] ests = [] for name, methodi in zip(method_all, methods):
def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=__debug__, pset=None, store=True): """ Parameters ---------- population toolbox cxpb mutpb ngen stats halloffame verbose pset store Returns ------- """ len_pop = len(population) logbook = Logbook() logbook.header = ['gen', 'nevals'] + (stats.fields if stats else []) random_seed = random.randint(1, 1000) # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) # fitnesses = parallelize(n_jobs=4, func=toolbox.evaluate, iterable=invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit[0], ind.expr = fit[1] if halloffame is not None: halloffame.update(population) random.seed(random_seed) record = stats.compile_(population) if stats else {} logbook.record(gen=0, nevals=len(invalid_ind), **record) if verbose: print(logbook.stream) data_all = {} # Begin the generational process for gen in range(1, ngen + 1): if store: if pset: subp = partial(sub, subed=pset.rep_name_list, subs=pset.name_list) data = [{"score": i.fitness.values[0], "expr": subp(i.expr)} for i in halloffame.items[-5:]] else: data = [{"score": i.fitness.values[0], "expr": i.expr} for i in halloffame.items[-5:]] data_all['gen%s' % gen] = data # select_gs the next generation individuals offspring = toolbox.select_gs(population, len_pop) # Vary the pool of individuals offspring = varAnd(offspring, toolbox, cxpb, mutpb) if halloffame is not None: offspring.extend(halloffame) random_seed = random.randint(1, 1000) # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) # fitnesses = parallelize(n_jobs=4, func=toolbox.evaluate, iterable=invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit[0], ind.expr = fit[1] # Update the hall of fame with the generated individuals if halloffame is not None: halloffame.update(offspring) if halloffame.items[-1].fitness.values[0] >= 0.95: print(halloffame.items[-1]) print(halloffame.items[-1].fitness.values[0]) break random.seed(random_seed) # Replace the current population by the offspring population[:] = offspring # Append the current generation statistics to the logbook record = stats.compile_(population) if stats else {} logbook.record(gen=gen, nevals=len(invalid_ind), **record) if verbose: print(logbook.stream) store = Store() store.to_txt(data_all) return population, logbook
def multiEaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=__debug__, pset=None, store=True, alpha=1): """ Parameters ---------- population toolbox cxpb mutpb ngen stats halloffame verbose pset store alpha Returns ------- """ logbook = Logbook() logbook.header = ['gen', 'nevals'] + (stats.fields if stats else []) # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in population if not ind.fitness.valid] random_seed = random.randint(1, 1000) # fitnesses = list(toolbox.map(toolbox.evaluate, [str(_) for _ in invalid_ind])) # fitnesses2 = toolbox.map(toolbox.evaluate2, [str(_) for _ in invalid_ind]) fitnesses = parallelize(n_jobs=6, func=toolbox.evaluate, iterable=[str(_) for _ in invalid_ind]) fitnesses2 = parallelize(n_jobs=6, func=toolbox.evaluate2, iterable=[str(_) for _ in invalid_ind]) def funcc(a, b): """ Parameters ---------- a b Returns ------- """ return (alpha * a + b) / 2 for ind, fit, fit2 in zip(invalid_ind, fitnesses, fitnesses2): ind.fitness.values = funcc(fit[0], fit2[0]), ind.values = (fit[0], fit2[0]) ind.expr = (fit[1], fit2[1]) if halloffame is not None: halloffame.update(population) random.seed(random_seed) record = stats.compile_(population) if stats else {} logbook.record(gen=0, nevals=len(invalid_ind), **record) if verbose: print(logbook.stream) data_all = {} # Begin the generational process for gen in range(1, ngen + 1): # select_gs the next generation individuals offspring = toolbox.select_gs(population, len(population)) # Vary the pool of individuals offspring = varAnd(offspring, toolbox, cxpb, mutpb) if halloffame is not None: offspring.extend(halloffame.items[-2:]) # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] random_seed = random.randint(1, 1000) # fitnesses = toolbox.map(toolbox.evaluate, [str(_) for _ in invalid_ind]) # fitnesses2 = toolbox.map(toolbox.evaluate2, [str(_) for _ in invalid_ind]) fitnesses = parallelize(n_jobs=6, func=toolbox.evaluate, iterable=[str(_) for _ in invalid_ind]) fitnesses2 = parallelize(n_jobs=6, func=toolbox.evaluate2, iterable=[str(_) for _ in invalid_ind]) for ind, fit, fit2 in zip(invalid_ind, fitnesses, fitnesses2): ind.fitness.values = funcc(fit[0], fit2[0]), ind.values = (fit[0], fit2[0]) ind.expr = (fit[1], fit2[1]) # Update the hall of fame with the generated individuals if halloffame is not None: halloffame.update(offspring) if halloffame.items[-1].fitness.values[0] >= 0.95: print(halloffame.items[-1]) print(halloffame.items[-1].fitness.values[0]) print(halloffame.items[-1].values[0]) print(halloffame.items[-1].values[1]) break if store: if pset: subp = partial(sub, subed=pset.rep_name_list, subs=pset.name_list) data = [{"score": i.values[0], "expr": subp(i.expr[0])} for i in halloffame.items[-2:]] data2 = [{"score": i.values[1], "expr": subp(i.expr[1])} for i in halloffame.items[-2:]] else: data = [{"score": i.values[0], "expr": i.expr} for i in halloffame.items[-2:]] data2 = [{"score": i.values[1], "expr": i.expr[2]} for i in halloffame.items[-2:]] data_all['gen%s' % gen] = list(zip(data, data2)) random.seed(random_seed) # Replace the current population by the offspring population[:] = offspring # Append the current generation statistics to the logbook record = stats.compile_(population) if stats else {} logbook.record(gen=gen, nevals=len(invalid_ind), **record) if verbose: print(logbook.stream) if store: store1 = Store() store1.to_txt(data_all) return population, logbook
index_col=0) name = list(element_table.loc["x_name"]) abbr = list(element_table.loc["abbrTex"]) name.extend([ 'face_dist1', 'vor_area1', 'face_dist2', 'vor_area2', "destiny", 'volume', "ele_ratio" ]) abbr.extend([ '$d_{vf1}$', '$S_{vf1}$', '$d_{vf2}$', '$S_{vf2}$', r"$\rho_c$", "$V_c$", "$ele_ratio$" ]) index = [name.index(i) for i in X_frame_name] abbr = np.array(abbr)[index] return abbr store = Store( r'C:\Users\Administrator\Desktop\band_gap_exp_last\3.MMGS\3.2') data = Call( r'C:\Users\Administrator\Desktop\band_gap_exp_last\1.generate_data', r'C:\Users\Administrator\Desktop\band_gap_exp_last\3.MMGS') all_import_structure = data.csv.all_import_structure data_import = all_import_structure select = [ 'destiny', 'distance core electron(schubert)', 'energy cohesive brewer', 'volume atomic(villars,daams)', 'radii covalent', 'electronegativity(martynov&batsanov)', 'latent heat of fusion' ] select = ['destiny' ] + [j + "_%i" % i for j in select[1:] for i in range(2)]
import pandas as pd from bgp.combination.dimanalysis import dimension_check from bgp.selection.quickmethod import dict_method_reg from bgp.selection.sum import SUM from mgetool.exports import Store from mgetool.imports import Call from mgetool.tool import name_to_name from sklearn import utils from sklearn.model_selection import GridSearchCV warnings.filterwarnings("ignore") """ this is a description """ if __name__ == "__main__": store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\3.sum\sub') data = Call( r'C:\Users\Administrator\Desktop\band_gap_exp', r'C:\Users\Administrator\Desktop\band_gap_exp\3.sum\method', ) data_import = data.csv().all_import name_init, abbr_init = data.pickle_pd().name_and_abbr select = [ 'cell volume', 'electron density', 'lattice constants a', 'lattice constants c', 'radii covalent', 'radii ionic(shannon)', 'distance core electron(schubert)', 'latent heat of fusion', 'energy cohesive brewer', 'total energy', 'charge nuclear effective(slater)', 'valence electron number', 'electronegativity(martynov&batsanov)', 'volume atomic(villars,daams)' ]
from bgp.base import SymbolSet from bgp.calculation.translate import group_str from bgp.flow import MultiMutateLoop from bgp.functions.dimfunc import Dim, dless from bgp.preprocess import MagnitudeTransformer if __name__ == "__main__": import os os.chdir(r'../../Instances/Instance1_bandgap/band_gap') data = Call() all_import = data.csv().all_import name_and_abbr = data.csv().name_and_abbr store = Store() data_import = all_import data225_import = data_import cal = [] from sympy.physics.units import eV select = [ 'electronegativity(martynov&batsanov)', 'fusion enthalpy', 'valence electron number' ] select_unit = [dless, eV, dless] cal.append((select, select_unit))
import numpy as np from bgp.selection.exhaustion import Exhaustion from bgp.selection.quickmethod import dict_method_reg from mgetool.exports import Store from mgetool.imports import Call from mgetool.show import BasePlot from mgetool.tool import name_to_name from sklearn import utils from sklearn.model_selection import GridSearchCV warnings.filterwarnings("ignore") """ this is a description """ if __name__ == "__main__": store = Store( r'C:\Users\Administrator\Desktop\band_gap_exp\3.sum\method', ) data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp') data_import = data.csv().all_import name_init, abbr_init = data.pickle_pd().name_and_abbr select = [ 'cell volume', 'electron density', 'lattice constants a', 'lattice constants c', 'radii covalent', 'radii ionic(shannon)', 'distance core electron(schubert)', 'latent heat of fusion', 'energy cohesive brewer', 'total energy', 'charge nuclear effective(slater)', 'valence electron number', 'electronegativity(martynov&batsanov)', 'volume atomic(villars,daams)' ] select = [ 'cell volume',
import warnings import matplotlib.pyplot as plt import numpy as np import sklearn from bgp.selection.backforward import BackForward from mgetool.exports import Store from mgetool.imports import Call from sklearn import svm from sklearn.model_selection import GridSearchCV, LeaveOneOut warnings.filterwarnings("ignore") # 数据导入 store = Store(r'/data/home/wangchangxin/data/zlj/') data = Call(r'/data/home/wangchangxin/data/zlj/', index_col=None) all_import = data.xlsx().data x_name = all_import.index.values y = all_import["y"].values x_frame = all_import.drop("y", axis=1) x = x_frame.values # # 预处理 # minmax = MinMaxScaler() # x = minmax.fit_transform(x) # 数据划分 xtrain, xtest = x[3:], x[:3] ytrain, ytest = y[3:], y[:3] xtrain, ytrain = sklearn.utils.shuffle(xtrain, ytrain, random_state=3)
import numpy as np import pandas as pd from featurebox.featurizers.compositionfeaturizer import DepartElementFeaturizer from mgetool.exports import Store from pymatgen import Composition """ this is a description """ if __name__ == "__main__": import os os.chdir(r'band_gap') store = Store() com_data = pd.read_excel(r'initial_band_gap_data.xlsx') # # """for element site""" from featurebox.data.impot_element_table import element_table name_and_abbr = element_table.iloc[[0, 1], :] element_table = element_table.iloc[2:, :] feature_select = [ 'lattice constants a', 'lattice constants b', 'lattice constants c', 'atomic radii(empirical)', 'atomic radii(clementi)',
population, logbook = eaSimple(pop, toolbox, cxpb=cxpb, mutpb=mutpb, ngen=ngen, stats=stats, halloffame=hof, pset=pset, store=store) return hof if __name__ == '__main__': # 输入 store = Store(r'D:\sy') data = Call(r'D:\sy') data_import = data.xlsx().featuredata name_abbr = data_import.columns.values x_name = name_abbr[:-1] # data_import = data_import.iloc[np.where(data_import['f1'] <= 1)[0]] X_frame = data_import[x_name] y_frame = data_import['y'] X = X_frame.values y = y_frame.values # 处理 # scal = preprocessing.MinMaxScaler()
def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=__debug__, pset=None, store=True): """ Parameters ---------- population toolbox cxpb mutpb ngen stats halloffame verbose pset store Returns ------- """ rst = random.getstate() len_pop = len(population) logbook = Logbook() logbook.header = [] + (stats.fields if stats else []) data_all = {} random.setstate(rst) for gen in range(1, ngen + 1): "评价" rst = random.getstate() """score""" invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.parallel(iterable=population) for ind, fit, in zip(invalid_ind, fitnesses): ind.fitness.values = fit[0], ind.expr = fit[1] ind.dim = fit[2] ind.withdim = fit[3] random.setstate(rst) rst = random.getstate() """elite""" add_ind = [] add_ind1 = toolbox.select_kbest_target_dim(population, K_best=0.01 * len_pop) add_ind2 = toolbox.select_kbest_dimless(population, K_best=0.01 * len_pop) add_ind3 = toolbox.select_kbest(population, K_best=5) add_ind += add_ind1 add_ind += add_ind2 add_ind += add_ind3 elite_size = len(add_ind) random.setstate(rst) rst = random.getstate() """score""" if store: subp = functools.partial(sub, subed=pset.rep_name_list, subs=pset.real_name_list) data = { "gen{}_pop{}".format(gen, n): { "gen": gen, "pop": n, "score": i.fitness.values[0], "expr": str(subp(i.expr)), } for n, i in enumerate(population) if i is not None } data_all.update(data) random.setstate(rst) rst = random.getstate() """record""" if halloffame is not None: halloffame.update(add_ind3) if len(halloffame.items ) > 0 and halloffame.items[-1].fitness.values[0] >= 0.95: print(halloffame.items[-1]) print(halloffame.items[-1].fitness.values[0]) break random.setstate(rst) rst = random.getstate() """Dynamic output""" record = stats.compile(population) if stats else {} logbook.record(gen=gen, pop=len(population), **record) if verbose: print(logbook.stream) random.setstate(rst) """crossover, mutate""" offspring = toolbox.select_gs(population, len_pop - elite_size) # Vary the pool of individuals offspring = varAnd(offspring, toolbox, cxpb, mutpb) rst = random.getstate() """re-run""" offspring.extend(add_ind) population[:] = offspring random.setstate(rst) store = Store() store.to_csv(data_all) return population, logbook
# x1 = x[:, 1] # x2 = x[:, 2] # x3 = x[:, 3] # # t = expr01 # func0 = sympy.utilities.lambdify(terminals, t) # re = func0(*x.T) # p = BasePlot(font=None) # p.scatter(y, re, strx='Experimental $E_{gap}$', stry='Calculated $E_{gap}$') # import matplotlib.pyplot as plt # # plt.show() if __name__ == '__main__': store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\4.symbol') data = Call(r'C:\Users\Administrator\Desktop\c', index_col=None) data_import = data.xlsx().sr X = data_import["delt_x"].values input_x = data_import[["delt_x", "G"]].values Pexp = data_import["Pexp"].values Pmix = data_import["Pmix"].values G = data_import["G"].values y = data_import["PG_y"].values y = y * G testfunc = input_x[:, 0] * input_x[:, 1] t = np.corrcoef(y, input_x[:, 0] * input_x[:, 1])
# @FileName: 4.symbollearing.py # @Software: PyCharm """ """ import numpy as np import pandas as pd import sympy from bgp.combination.symbolbase import calculateExpr, getName from mgetool.exports import Store from mgetool.imports import Call if __name__ == "__main__": store = Store( r'C:\Users\Administrator\Desktop\band_gap_exp_last\4.symbollearning') data = Call( r'C:\Users\Administrator\Desktop\band_gap_exp_last\1.generate_data', r'C:\Users\Administrator\Desktop\band_gap_exp_last\3.MMGS', r'C:\Users\Administrator\Desktop\band_gap_exp_last\2.correction_analysis' ) all_import_structure = data.csv.all_import_structure data_import = all_import_structure data216_import = data_import.iloc[np.where( data_import['group_number'] == 216)[0]] data225_import = data_import.iloc[np.where( data_import['group_number'] == 225)[0]] data221_import = data_import.iloc[np.where( data_import['group_number'] == 221)[0]] data216_225_221import = pd.concat(
# @License: BSD 3-Clause """ this is a description """ import numpy as np import pandas as pd from bgp.selection.corr import Corr from mgetool.exports import Store from mgetool.imports import Call from mgetool.tool import name_to_name # import seaborn as sns if __name__ == "__main__": store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\2.corr') data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp') all_import = data.csv().all_import name_init, abbr_init = data.pickle_pd().name_and_abbr data_import = all_import data225_import = data_import.iloc[np.where( data_import['group_number'] == 225)[0]] X_frame = data225_import.drop(['exp_gap', 'group_number'], axis=1) y_frame = data225_import['exp_gap'] X = X_frame.values y = y_frame.values """calculate corr""" corr = Corr(threshold=0.90, muti_grade=2, muti_index=[2, len(X)]) corr.fit(X_frame)
from mgetool.exports import Store from mgetool.imports import Call from sklearn.utils import shuffle from bgp.functions.dimfunc import Dim, dless from bgp.preprocess import MagnitudeTransformer if __name__ == "__main__": import os os.chdir(r'band_gap') data = Call() all_import = data.csv().all_import name_and_abbr = data.csv().name_and_abbr store = Store() data_import = all_import data225_import = data_import select = [ 'cell volume', 'electron density', 'lattice constants a', 'lattice constants c', 'covalent radii', 'ionic radii(shannon)', 'core electron distance(schubert)', 'fusion enthalpy', 'cohesive energy(Brewer)', 'total energy', 'effective nuclear charge(slater)', "electron number", 'valence electron number', 'electronegativity(martynov&batsanov)', 'atomic volume(villars,daams)' ] from sympy.physics.units import eV, pm, nm
# [-1, ["exp", "log","Rec", "Self"]], # ], # definate_variable=[ # [-3, [0]], # [-2, [1]], # [-1, [2]]], # operate_linkage=[[-1, -2], ], # # variable_linkage = None # ) # # result = mainPart(X, y, pset1, pop_n=500, random_seed=2, cxpb=0.8, mutpb=0.1, ngen=20, # inter_add=True, iner_add=False, random_add=False, score=[explained_variance_score, r2_score]) # ret = result[2][1] if __name__ == "__main__": store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp_last\4.symbollearning') data_cluster = Call(r'C:\Users\Administrator\Desktop\band_gap_exp_last\1.generate_data', r'C:\Users\Administrator\Desktop\band_gap_exp_last\3.MMGS') all_import_structure = data_cluster.csv.all_import_structure data_import = all_import_structure select_gs = ['destiny', 'energy cohesive brewer', 'distance core electron(schubert)'] select_gs = ['destiny'] + [j + "_%i" % i for j in select_gs[1:] for i in range(2)] data216_import = data_import.iloc[np.where(data_import['group_number'] == 216)[0]] data225_import = data_import.iloc[np.where(data_import['group_number'] == 225)[0]] data216_225_import = pd.concat((data216_import, data225_import)) X_frame = data225_import[select_gs] y_frame = data225_import['exp_gap']
# print("r:{},error:{},r2:{},MAE:{},r2_cv:{}".format(r, error, r2, mae, r2_cv[0])) # # # st.end() # st.to_csv(data, file_new_name="top_n") # if __name__ == "__main__": pa_factor, pa_dim = Dim.convert_to(10 * 6 * pa) ###########第三个########### """数据""" com_data = pd.read_csv(r'reg3.csv') x = com_data.iloc[:, :-1].values y = com_data.iloc[:, -1].values x, y = shuffle(x, y, random_state=0) st = Store("reg3_result_error") st.start() sl = SymbolLearning(loop=r'MultiMutateLoop', cal_dim=True, dim_type=None, pop=2000, gen=30, add_coef=True, re_hall=2, scoring=[ error, ], score_pen=[ -1, ], inter_add=False,