pset.add_features( x, y, x_group=group, ) pset.add_accumulative_operation(categories=("MAdd", "MMul", "MSub", "MDiv", "Conv", "Self"), special_prob={ "MAdd": 0.16, "MMul": 0.16, "MSub": 0.16, "MDiv": 0.16, "Conv": 0.16, "Self": 0.16 }) pset.add_operations(categories=("Add", "Mul", "Sub", "Div")) s = pset.free_symbol[1] ss = [] for si in s: if isinstance(si, sympy.Symbol): ss.append(si) else: ss.extend(si) target = (ss[0] + ss[1]) * (ss[2] - ss[3]) target = sympy.simplify(target) # a = time.time() random.seed(4) population = [ SymbolTree.genFull(pset, int(height - 1),
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0) store = Store() # symbolset pset0 = SymbolSet() pset0.add_features(X_train, y_train) pset0.add_constants(c=[ 1, ]) pset0.add_operations( power_categories=(2, ), categories=("Add", "exp", "Neg"), ) h_bgp = 3 # stop = None # This random_state is under Linux system. For others system ,the random_state maybe different。 # try with different random_state. stop = lambda ind: ind.fitness.values[0] >= 0.99 sl = SymbolLearning(loop='MultiMutateLoop', pset=pset0, gen=10, pop=3000, hall=1, batch_size=40, re_hall=5, n_jobs=12,
def fit(self, X=None, y=None, c=None, x_group=None, x_dim=1, y_dim=1, c_dim=1, x_prob=None, c_prob=None, pset=None, power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Sub", "Div"), warm_start=False, new_gen=None): """ Method 1. fit with x, y. Examples:: sl = SymbolLearning() sl..fit(x,y,...) Method 2. fit with customized pset. If need more self-definition, use one defined SymbolSet object to ``pset``. Examples:: pset = SymbolSet() pset.add_features_and_constants(...) pset.add_operations(...) ... sl = SymbolLearning() sl..fit(pset=pset) Parameters ---------- X:np.ndarray data. y:np.ndarray y. c:list of float, None constants. x_dim: 1 or list of Dim the same size wih x.shape[1], default 1 is dless for all x. y_dim: 1,Dim dim of y. c_dim: 1,list of Dim the same size wih c.shape, default 1 is dless for all c. x_prob: None,list of float the same size wih x.shape[1]. c_prob: None,list of float the same size wih c. x_group:list of list Group of x. Examples: x_group=[[1,2],] or x_group=2 See Also :py:func:`bgp.base.SymbolSet.add_features` power_categories: Sized,tuple, None Examples:(0.5,2,3) categories: tuple of str map table: {"Add": sympy.Add, 'Sub': Sub, 'Mul': sympy.Mul, 'Div': Div} {"sin": sympy.sin, 'cos': sympy.cos, 'exp': sympy.exp, 'ln': sympy.ln, {'Abs': sympy.Abs, "Neg": functools.partial(sympy.Mul, -1.0), "Rec": functools.partial(sympy.Pow, e=-1.0)} Others: \n "Rem": f(x)=1-x,if x true \n "Self": f(x)=x,if x true \n pset:SymbolSet See Also SymbolSet. warm_start: bool warm start or not. Note: If you offer pset in advance by user, please check carefully the feature numbers,especially when use ``re_Tree``. because the new features are add. Reference: CalculatePrecisionSet.update_with_X_y. new_gen: None,int warm_start generation. """ # try to find pest form args,kwargs psets = [i for i in self.args if isinstance(i, SymbolSet)] if len(psets) > 0: self.args.remove(psets[0]) if "pset" in self.kwargs: psets.append(self.kwargs["pset"]) del self.kwargs["pset"] if pset is None: if len(psets) > 0: pset = psets[0] if pset is None: # one simple pset are generate with no dimension calculation, But just with x_group.\n if X is not None and y is not None: pset = SymbolSet() pset.add_features_and_constants(X, y, c, x_dim=x_dim, y_dim=y_dim, c_dim=c_dim, x_prob=x_prob, c_prob=c_prob, x_group=x_group, feature_name=None) pset.add_operations(power_categories=power_categories, categories=categories) elif hasattr(self.loop, "gen"): pass else: raise ValueError( "The pset should be defined or the X and Y should be offered." ) #################################### if warm_start: assert hasattr( self.loop, "gen"), "Before the warm_start, Need fit at least one time" if X is not None and y is not None: self.loop.cpset.update_with_X_y(X, y) elif pset: # the warm_start are not compacting with "re_Tree" self.loop.cpset.update(pset) else: raise ValueError( "The pset should be defined or the X and Y should be offered." ) self.loop.re_fresh_by_name() hall = self.loop.run(warm_start=True, new_gen=new_gen) else: if hasattr(self.loop, "gen"): loops = self.loop.__class__ self.loop = loops(pset, *self.args, **self.kwargs) else: self.loop = self.loop(pset, *self.args, **self.kwargs) hall = self.loop.run() self.best_one = hall.items[0] try: expr = general_expr(self.best_one.coef_expr, self.loop.cpset) self.expr_type = "single" except (RecursionError, RuntimeWarning): expr = self.best_one.coef_expr self.expr_type = "group" self.expr = expr self.y_dim = self.best_one.y_dim self.fitness = self.best_one.fitness.values[0]
x_u = [kg] * 13 y_u = kg c_u = [dless, dless, dless] x, x_dim = Dim.convert_x(x, x_u, target_units=None, unit_system="SI") y, y_dim = Dim.convert_xi(y, y_u) c, c_dim = Dim.convert_x(c, c_u) t = time.time() # symbolset pset0 = SymbolSet() pset0.add_features(x, y, x_dim=x_dim, y_dim=y_dim, x_group=[[1, 2], [3, 4, 5]]) pset0.add_constants(c, c_dim=c_dim, c_prob=None) pset0.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Sub", "Div", "exp"), self_categories=None) random.seed(0) z = time.time() sl = [SymbolTree.genGrow(pset0, 3, 4) for _ in range(100)] a = time.time() sl = [compile_context(sli, pset0.context, pset0.gro_ter_con) for sli in sl] b = time.time() print(b - a, a - z)
SL_data = data.SL_data si_transformer = data.si_transformer store = Store() x, x_dim, y, y_dim, c, c_dim, X, Y = SL_data x_g = np.arange(x.shape[1]) x_g = list(x_g[1:]) x_g = x_g.reshape(-1, 2) pset0 = SymbolSet() pset0.add_features(x, y, x_dim=x_dim, y_dim=y_dim, x_group=x_g) pset0.add_constants(c, c_dim=c_dim, c_prob=0.05) pset0.add_operations(power_categories=(2, 3, 0.5, 1 / 3, 4, 1 / 4), # categories=("Mul",), categories=("Add", "Mul", "Sub", "Div", "exp", "ln"), self_categories=None) total_height = 3 h_bgp = 2 # This random_state is under Linux system. For others system ,the random_state maybe different,please # try with different random_state. for i in range(1, 10): stop = lambda ind: ind.fitness.values[0] >= 0.95 sl = SymbolLearning(loop="MultiMutateLoop", pset=pset0, gen=20, pop=1000, hall=1, batch_size=40, re_hall=3, n_jobs=12, mate_prob=0.9, max_value=h_bgp, initial_min=2, initial_max=h_bgp, mutate_prob=0.8, tq=False, dim_type="coef", stop_condition=stop, re_Tree=0, store=False, random_state=4, verbose=True, # stats=None, stats={"fitness_dim_max": ["max"], "dim_is_target": ["sum"], "h_bgp": ["mean"]}, add_coef=True, inter_add=True, out_add=True, cal_dim=True, vector_add=True,
x_u = [kg] * 13 y_u = kg c_u = [dless, dless, dless] x, x_dim = Dim.convert_x(x, x_u, target_units=None, unit_system="SI") y, y_dim = Dim.convert_xi(y, y_u) c, c_dim = Dim.convert_x(c, c_u) z = time.time() # symbolset pset0 = SymbolSet() pset0.add_features(x, y, x_dim=x_dim, y_dim=y_dim, x_group=[[1, 2], [3, 4], [5, 6]]) pset0.add_constants(c, c_dim=c_dim, c_prob=None) pset0.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Sub", "Div", "exp", "Abs")) # a = time.time() bl = MultiMutateLoop(pset=pset0, gen=20, pop=2000, hall=2, batch_size=60, re_hall=2, n_jobs=1, mate_prob=1, max_value=3, initial_max=1, initial_min=1, mutate_prob=0.8, tq=True, dim_type="coef", re_Tree=None, store=False, random_state=2, stats={"fitness_dim_max": ["max"], "dim_is_target": ["sum"], "h_bgp": ["max"]}, add_coef=True, cal_dim=False, inner_add=False, vector_add=True, personal_map=False) # b = time.time() bl.run() bl.run(warm_start=True) # population = [bl.PTree(bl.genFull()) for _ in range(30)] # pset = bl.cpset
# unittest.main() import numpy as np x = np.array([[10, 6, 3, 4, 5, 6, 7, 8, 9, 9, 10, 9, 7, 5, 3, 1], [1, 2, 3, 4, 4, 3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 15], [2, 3, 4, 8, 12, 16, 30, 32, 33, 30, 20, 10, 5, 3, 2, 1]]).T x[:, 2] = x[:, 0] / x[:, 1] y = np.zeros(x.shape[0]) x = x y = y pset = SymbolSet() pset.add_features(x, y) pset.add_operations( categories=("Add", "Mul", "Self", "Abs"), self_categories=None) from sklearn.metrics import r2_score, mean_squared_error cp = CalculatePrecisionSet(pset, scoring=[r2_score, mean_squared_error], score_pen=[1, -1], filter_warning=True) x0, x1, x2 = sympy.symbols("x0, x1, x2") # t=Function("t") # expr00 = (x2*x1+x0*x2*2).subs(x0, t(x1)) # dv1 = sympy.diff(expr00, x1, evaluate=True) # dv1 = dv1.subs(t(x1), x0) # # t = Function("t")
gpa_dim = Dim.convert_to_Dim(1e9 * pa, unit_system="SI") j_d_mol_dim = Dim.convert_to_Dim(1000 * J / mol, unit_system="SI") K_dim = Dim.convert_to_Dim(K, unit_system="SI") kg_d_m3_dim = Dim.convert_to_Dim(kg / m**3, unit_system="SI") # 忽视缩放因子 y_dim = dless x_dim = [ dless, gpa_dim[1], j_d_mol_dim[1], K_dim[1], dless, kg_d_m3_dim[1] ] # 符号集合 pset0 = SymbolSet() pset0.add_features(x, y, x_dim=x_dim, y_dim=y_dim) pset0.add_operations( power_categories=(2, 3, 0.5), categories=("Mul", "Div", "exp"), ) # 符号回归 # 方式选择1,系数加在最外层 # sl = SymbolLearning(loop="MultiMutateLoop", pop=100, gen=2, random_state=1,pset=pset0, # classification=True, scoring=[metrics.accuracy_score, ], score_pen=[1, ], # cal_dim=True, n_jobs = 10, # store =True # ) # # 方式选择2,系数加在最外层,认定系数可以自动补全量纲 # pset0.y_dim=None # sl = SymbolLearning(loop="MultiMutateLoop", pop=1000, gen=3, random_state=1,pset=pset0, # classification=True, scoring=[metrics.accuracy_score, ], score_pen=[1, ],
X = np.concatenate((X, (X[:, 1] / X[:, 0]).reshape(-1, 1)), axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=3) store = Store() # symbolset pset0 = SymbolSet() pset0.add_features(X_train, y_train) # pset0.add_constants(c=[1, ]) #pset0.add_operations(power_categories=(2,0.5), pset0.add_operations( # power_categories=(2,), categories=("exp", "Mul", "Sub"), self_categories=None) h_bgp = 3 # stop = None # This random_state is under Linux system. For others system ,the random_state maybe different。 # try with different random_state. stop = lambda ind: ind.fitness.values[0] >= 0.999 sl = SymbolLearning(loop='MultiMutateLoop', pset=pset0, gen=10, pop=3000, hall=1, batch_size=40, re_hall=5,
class MyTestgp(unittest.TestCase): def setUp(self): self.SymbolTree = SymbolTree self.pset = SymbolSet() from sklearn.datasets import fetch_california_housing data = fetch_california_housing() x = data["data"][:100] y = data["target"][:100] self.x = x self.y = y # self.pset.add_features(x, y, ) self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]]) self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None) self.pset.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Neg", "Abs"), self_categories=None) from sklearn.metrics import r2_score, mean_squared_error self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error], score_pen=[1, -1], dim_type=None, filter_warning=True) def test_gp_flow(self): from numpy import random random.seed(1) cpset = self.cp # def Tree from deap.base import Fitness Fitness_ = newclass.create("Fitness_", Fitness, weights=(1, -1)) PTree_ = newclass.create("PTrees_", SymbolTree, fitness=Fitness_) # def selection toolbox = Toolbox() # toolbox.register("select", selTournament, tournsize=3) toolbox.register("select", selKbestDim, dim_type=dless) # selBest toolbox.register("mate", cxOnePoint) # def mutate toolbox.register("generate", genGrow, pset=cpset, min_=2, max_=3) # toolbox.register("mutate", mutUniform, expr=toolbox.generate, pset=cpset) # toolbox.register("mutate", mutNodeReplacement, pset=cpset) toolbox.register("mutate", mutShrink, pset=cpset) toolbox.decorate( "mate", staticLimit(key=operator.attrgetter("height"), max_value=10)) toolbox.decorate( "mutate", staticLimit(key=operator.attrgetter("height"), max_value=10)) # def elaluate # toolbox.register("evaluate", cpset.parallelize_calculate, n_jobs=4, add_coef=True, # inter_add=False, inner_add=False) # toolbox.register("parallel", parallelize, n_jobs=1, func=toolbox.evaluate, respective=False, tq=False) population = [PTree_.genGrow(cpset, 3, 4) for _ in range(10)] # si = sys.getsizeof(cpset) for i in range(5): invalid_ind = [ind for ind in population if not ind.fitness.valid] invalid_ind_score = cpset.parallelize_score(inds=invalid_ind) for ind, score in zip(invalid_ind, invalid_ind_score): ind.fitness.values = score[0] ind.y_dim = score[1] # si2 = sys.getsizeof(invalid_ind[0]) # invalid_ind=[i.compress() for i in invalid_ind] # si3 = sys.getsizeof(invalid_ind[0]) # print(si3,si2,si) population = toolbox.select(population, len(population)) offspring = varAnd(population, toolbox, 1, 1) population[:] = offspring
class MyTestbase(unittest.TestCase): def setUp(self): self.SymbolTree = SymbolTree self.pset = SymbolSet() from sklearn.datasets import fetch_california_housing data = fetch_california_housing() x = data["data"][:100] y = data["target"][:100] # No = Normalizer() # y=y/max(y) # x = No.fit_transform(x) self.x = x self.y = y # self.pset.add_features(x, y, ) self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]]) self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None) self.pset.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Self", "Abs"), self_categories=None) from sklearn.metrics import r2_score, mean_squared_error self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error], score_pen=[1, -1], filter_warning=True) def test_pset_passed_to_cpset_will_change(self): cp = CalculatePrecisionSet(self.pset) self.assertNotEqual(cp, self.cp) def test_tree_gengrow_repr_and_str_different(self): from numpy import random random.seed(1) sl = SymbolTree.genGrow(self.pset, 3, 4) print(sl) # self.assertNotEqual(repr(sl), str(sl)) def test_add_tree_back(self): from numpy import random random.seed(1) sl = SymbolTree.genGrow(self.pset, 3, 4) self.pset.add_tree_to_features(sl) # def test_barch_tree(self): from numpy import random random.seed(1) for i in range(10): sl = SymbolTree.genGrow(self.pset, 3, 4) cpsl = self.cp.calculate_detail(sl) self.assertIsNotNone(cpsl.y_dim) self.assertIsNotNone(cpsl.expr) self.assertIsNone(cpsl.p_name) if cpsl.pre_y is not None: self.assertIsInstance(cpsl.pre_y, numpy.ndarray) self.assertEqual(cpsl.pre_y.shape, self.y.shape) print(cpsl.coef_pre_y[:3]) print(cpsl.pre_y[:3]) print(cpsl.coef_score) print(cpsl.coef_expr) print(cpsl.pure_expr) def test_depart_tree(self): from numpy import random random.seed(1) for i in range(10): sl = SymbolTree.genGrow(self.pset, 5, 6) sl_departs = sl.depart() for i in sl_departs: cpsl = self.cp.calculate_simple(i) self.assertIsNotNone(cpsl.y_dim) self.assertIsNotNone(cpsl.expr) self.assertIsNone(cpsl.p_name)