def setUp(self): self.SymbolTree = SymbolTree self.pset = SymbolSet() from sklearn.datasets import load_boston data = load_boston() x = data["data"] y = data["target"] # No = Normalizer() # y=y/max(y) # x = No.fit_transform(x) self.x = x self.y = y # self.pset.add_features(x, y, ) self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]]) self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None) self.pset.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Self", "Abs"), self_categories=None) from sklearn.metrics import r2_score, mean_squared_error self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error], score_pen=[1, -1], filter_warning=True)
def fit(self, X=None, y=None, c=None, x_group=None, pset=None): """ If pset is None, one simple pset are generate with no dimension calculation, But just with x_group.\n If need more self-definition, use one defined SymbolSet object to pset.\n Examples: pset = SymbolSet()\n pset.add_features_and_constants(...)\n pset.add_operations(...)\n ...\n ...SymbolLearning().fit(pset=pset)\n Parameters ---------- X:np.ndarray y:np.ndarray c:list of float x_group:list of list Group of x.\n See Also pset.add_features_and_constants pset:SymbolSet See Also SymbolSet """ if pset is None: pset = SymbolSet() pset.add_features_and_constants(X, y, c, x_dim=1, y_dim=1, c_dim=1, x_prob=None, c_prob=None, x_group=x_group, feature_name=None) pset.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Sub", "Div")) self.loop = self.loop(pset, *self.args, **self.kwargs) hall = self.loop.run() self.best_one = hall.items[0] try: expr = general_expr(self.best_one.coef_expr, self.loop.cpset) self.expr_type = "single" except (RecursionError, RuntimeWarning): expr = self.best_one.coef_expr self.expr_type = "group" self.expr = expr self.y_dim = self.best_one.y_dim self.fitness = self.best_one.fitness.values[0]
class MyTestbase(unittest.TestCase): def setUp(self): self.SymbolTree = SymbolTree self.pset = SymbolSet() from sklearn.datasets import load_boston data = load_boston() x = data["data"] y = data["target"] # No = Normalizer() # y=y/max(y) # x = No.fit_transform(x) self.x = x self.y = y # self.pset.add_features(x, y, ) self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]]) self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None) self.pset.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Self", "Abs"), self_categories=None) from sklearn.metrics import r2_score, mean_squared_error self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error], score_pen=[1, -1], filter_warning=True) def test_pset_passed_to_cpset_will_change(self): cp = CalculatePrecisionSet(self.pset) self.assertNotEqual(cp, self.cp) def test_tree_gengrow_repr_and_str_different(self): from numpy import random random.seed(1) sl = SymbolTree.genGrow(self.pset, 3, 4) print(sl) # self.assertNotEqual(repr(sl), str(sl)) def test_add_tree_back(self): from numpy import random random.seed(1) sl = SymbolTree.genGrow(self.pset, 3, 4) self.pset.add_tree_to_features(sl) # def test_barch_tree(self): from numpy import random random.seed(1) for i in range(10): sl = SymbolTree.genGrow(self.pset, 3, 4) cpsl = self.cp.calculate_detail(sl) self.assertIsNotNone(cpsl.y_dim) self.assertIsNotNone(cpsl.expr) self.assertIsNone(cpsl.p_name) if cpsl.pre_y is not None: self.assertIsInstance(cpsl.pre_y, numpy.ndarray) self.assertEqual(cpsl.pre_y.shape, self.y.shape) print(cpsl.coef_pre_y[:3]) print(cpsl.pre_y[:3]) print(cpsl.coef_score) print(cpsl.coef_expr) print(cpsl.pure_expr) def test_depart_tree(self): from numpy import random random.seed(1) for i in range(10): sl = SymbolTree.genGrow(self.pset, 5, 6) sl_departs = sl.depart() for i in sl_departs: cpsl = self.cp.calculate_simple(i) self.assertIsNotNone(cpsl.y_dim) self.assertIsNotNone(cpsl.expr) self.assertIsNone(cpsl.p_name)
c = [6, 3, 4] # unit from sympy.physics.units import kg x_u = [kg] * 13 y_u = kg c_u = [dless, dless, dless] x, x_dim = Dim.convert_x(x, x_u, target_units=None, unit_system="SI") y, y_dim = Dim.convert_xi(y, y_u) c, c_dim = Dim.convert_x(c, c_u) t = time.time() # symbolset pset0 = SymbolSet() pset0.add_features(x, y, x_dim=x_dim, y_dim=y_dim, x_group=[[1, 2], [3, 4, 5]]) pset0.add_constants(c, c_dim=c_dim, c_prob=None) pset0.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Sub", "Div", "exp"), self_categories=None) random.seed(0) z = time.time() sl = [SymbolTree.genGrow(pset0, 3, 4) for _ in range(100)] a = time.time() sl = [compile_context(sli, pset0.context, pset0.gro_ter_con) for sli in sl]
# c_unit c = [1, 5.290 * 10**-11, 1.74] c_u = [ elementary_charge, m, dless, dless, dless, dless, dless, dless, dless ] """preprocessing""" x, x_dim = Dim.convert_x(x, x_u, target_units=None, unit_system="SI") y, y_dim = Dim.convert_xi(y, y_u) c, c_dim = Dim.convert_x(c, c_u) scal = MagnitudeTransformer(tolerate=1) x, y = scal.fit_transform_all(x, y, group=2) c = scal.fit_transform_constant(c) # symbolset pset0 = SymbolSet() x_g = np.arange(x.shape[1]) x_g = x_g.reshape(-1, 2) x_g = list(x_g[1:]) pset0.add_features(x, y, x_dim=x_dim, y_dim=y_dim, x_group=x_g, feature_name=fea_name) pset0.add_constants(c, c_dim=c_dim, c_prob=0.05) pset0.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Sub", "Div", "exp", "ln"), self_categories=None) height = 2
class MyTestgp(unittest.TestCase): def setUp(self): self.SymbolTree = SymbolTree self.pset = SymbolSet() from sklearn.datasets import load_boston data = load_boston() x = data["data"] y = data["target"] self.x = x self.y = y # self.pset.add_features(x, y, ) self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]]) self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None) self.pset.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Neg", "Abs"), self_categories=None) from sklearn.metrics import r2_score, mean_squared_error self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error], score_pen=[1, -1],dim_type=None, filter_warning=True) def test_gp_flow(self): from numpy import random random.seed(1) cpset = self.cp # def Tree from deap.base import Fitness from featurebox.tools import newclass Fitness_ = newclass.create("Fitness_", Fitness, weights=(1, -1)) PTree_ = newclass.create("PTrees_", SymbolTree, fitness=Fitness_) # def selection toolbox = Toolbox() # toolbox.register("select", selTournament, tournsize=3) toolbox.register("select", selKbestDim, dim_type=dless) # selBest toolbox.register("mate", cxOnePoint) # def mutate toolbox.register("generate", genGrow, pset=cpset, min_=2, max_=3) # toolbox.register("mutate", mutUniform, expr=toolbox.generate, pset=cpset) # toolbox.register("mutate", mutNodeReplacement, pset=cpset) toolbox.register("mutate", mutShrink,pset=cpset) toolbox.decorate("mate", staticLimit(key=operator.attrgetter("height"), max_value=10)) toolbox.decorate("mutate", staticLimit(key=operator.attrgetter("height"), max_value=10)) # def elaluate # toolbox.register("evaluate", cpset.parallelize_calculate, n_jobs=4, add_coef=True, # inter_add=False, inner_add=False) # toolbox.register("parallel", parallelize, n_jobs=1, func=toolbox.evaluate, respective=False, tq=False) population = [PTree_.genGrow(cpset, 3, 4) for _ in range(10)] # si = sys.getsizeof(cpset) for i in range(5): invalid_ind = [ind for ind in population if not ind.fitness.valid] invalid_ind_score = cpset.parallelize_score(inds=invalid_ind) for ind, score in zip(invalid_ind, invalid_ind_score): ind.fitness.values = score[0] ind.y_dim = score[1] # si2 = sys.getsizeof(invalid_ind[0]) # invalid_ind=[i.compress() for i in invalid_ind] # si3 = sys.getsizeof(invalid_ind[0]) # print(si3,si2,si) population = toolbox.select(population, len(population)) offspring = varAnd(population, toolbox, 1, 1) population[:] = offspring
c = [6, 3, 4] # unit from sympy.physics.units import kg x_u = [kg] * 13 y_u = kg c_u = [dless, dless, dless] x, x_dim = Dim.convert_x(x, x_u, target_units=None, unit_system="SI") y, y_dim = Dim.convert_xi(y, y_u) c, c_dim = Dim.convert_x(c, c_u) t = time.time() # symbolset pset0 = SymbolSet() pset0.add_features(x, y, x_dim=x_dim, y_dim=y_dim, x_group=[[1, 2], [3, 4, 5], [6, 7]], feature_name=["Ss%i" % i for i in range(13)]) pset0.add_constants(c, c_dim=c_dim, c_prob=None) pset0.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Sub", "Div", "ln"), self_categories=None) random.seed(2) z = time.time() sl = [SymbolTree.genGrow(pset0, 3, 4) for _ in range(500)] a = time.time()
import sympy from featurebox.symbol.base import SymbolSet, SymbolTree import numpy as np from featurebox.symbol.calculation.translate import general_expr_dict, compile_context if __name__ == "__main__": x = np.full((10, 4), fill_value=0.1) y = np.ones((10, )) height = 2 # GVP group = 2 pset = SymbolSet() pset.add_features( x, y, x_group=group, ) pset.add_accumulative_operation(categories=("MAdd", "MMul", "MSub", "MDiv", "Conv", "Self"), special_prob={ "MAdd": 0.16, "MMul": 0.16, "MSub": 0.16, "MDiv": 0.16, "Conv": 0.16, "Self": 0.16 })