def setUp(self): self.SymbolTree = SymbolTree self.pset = SymbolSet() from sklearn.datasets import fetch_california_housing data = fetch_california_housing() x = data["data"][:100] y = data["target"][:100] # No = Normalizer() # y=y/max(y) # x = No.fit_transform(x) self.x = x self.y = y # self.pset.add_features(x, y, ) self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]]) self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None) self.pset.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Self", "Abs"), self_categories=None) from sklearn.metrics import r2_score, mean_squared_error self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error], score_pen=[1, -1], filter_warning=True)
def setUp(self): self.SymbolTree = SymbolTree self.pset = SymbolSet() from sklearn.datasets import load_boston data = load_boston() x = data["data"] y = data["target"] self.x = x self.y = y # self.pset.add_features(x, y, ) self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]]) self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None) self.pset.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Neg", "Abs"), self_categories=None) from sklearn.metrics import r2_score, mean_squared_error self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error], score_pen=[1, -1], dim_type=None, filter_warning=True)
def __init__(self, pset, pop=500, gen=20, mutate_prob=0.5, mate_prob=0.8, hall=1, re_hall=1, re_Tree=None, initial_min=None, initial_max=3, max_value=5, scoring=(r2_score,), score_pen=(1,), filter_warning=True, cv=1, add_coef=True, inter_add=True, inner_add=False, vector_add=False, out_add=False, flat_add=False, cal_dim=False, dim_type=None, fuzzy=False, n_jobs=1, batch_size=40, random_state=None, stats=None, verbose=True, migrate_prob=0, tq=True, store=False, personal_map=False, stop_condition=None, details=False, classification=False, score_object="y", sub_mu_max=1, limit_type="h_bgp", batch_para=False): """ Parameters ---------- pset:SymbolSet the feature x and target y and others should have been added. pop: int number of population. gen: int number of generation. mutate_prob:float probability of mutate. mate_prob:float probability of mate(crossover). initial_max:int max initial size of expression when first producing. initial_min : None,int min initial size of expression when first producing. max_value:int max size of expression. limit_type: "height" or "length",","h_bgp" limitation type for max_value, but don't affect initial_max, initial_min. hall:int,>=1 number of HallOfFame (elite) to maintain. re_hall:None or int>=2 Notes: only valid when hall number of HallOfFame to add to next generation. re_Tree: int number of new features to add to next generation. 0 is false to add. personal_map:bool or "auto" "auto" is using 'premap' and with auto refresh the 'premap' with individual.\n True is just using constant 'premap'.\n False is just use the prob of terminals. scoring: list of Callable, default is [sklearn.metrics.r2_score,] See Also ``sklearn.metrics`` score_pen: tuple of 1, -1 or float but 0. >0 : max problem, best is positive, worse -np.inf. <0 : min problem, best is negative, worse np.inf. Notes: if multiply score method, the scores must be turn to same dimension in prepossessing or weight by score_pen. Because the all the selection are stand on the mean(w_i*score_i) Examples:: scoring = [r2_score,] score_pen= [1,] cv:sklearn.model_selection._split._BaseKFold,int the shuffler must be False, default=1 means no cv. filter_warning:bool filter warning or not. add_coef:bool add coef in expression or not. inter_add:bool add intercept constant or not. inner_add:bool add inner coefficients or not. out_add:bool add out coefficients or not. flat_add:bool add flat coefficients or not. n_jobs:int default 1, advise 6. batch_size:int default 40, depend of machine. random_state:int None,int. cal_dim:bool escape the dim calculation. dim_type:Dim or None or list of Dim "coef": af(x)+b. a,b have dimension,f(x)'s dimension is not dnan. \n "integer": af(x)+b. f(x) is with integer dimension. \n [Dim1,Dim2]: f(x)'s dimension in list. \n Dim: f(x) ~= Dim. (see fuzzy) \n Dim: f(x) == Dim. \n None: f(x) == pset.y_dim fuzzy:bool choose the dim with same base with dim_type, such as m,m^2,m^3. stats:dict details of logbook to show. \n Map:\n values = {"max": np.max, "mean": np.mean, "min": np.mean, "std": np.std, "sum": np.sum} keys = {\n "fitness": just see fitness[0], \n "fitness_dim_max": max problem, see fitness with demand dim,\n "fitness_dim_min": min problem, see fitness with demand dim,\n "dim_is_target": demand dim,\n "coef": dim is True, coef have dim, \n "integer": dim is integer, \n ... } if stats is None, default is: for cal_dim=True: stats = {"fitness_dim_max": ("max",), "dim_is_target": ("sum",)} for cal_dim=False stats = {"fitness": ("max",)} if self-definition, the key is func to get attribute of each ind. Examples:: def func(ind): return ind.fitness[0] stats = {func: ("mean",), "dim_is_target": ("sum",)} verbose:bool print verbose logbook or not. tq:bool print progress bar or not. store:bool or path bool or path. stop_condition:callable stop condition on the best ind of hall, which return bool,the true means stop loop. Examples:: def func(ind): c = ind.fitness.values[0]>=0.90 return c details:bool return expr and predict_y or not. classification: bool classification or not. score_object: score by y or delta y (for implicit function). """ super(BaseLoop, self).__init__() assert initial_max <= max_value, "the initial size of expression should less than max_value limitation" if cal_dim: assert all( [isinstance(i, Dim) for i in pset.dim_ter_con.values()]), \ "all import dim of pset should be Dim object." self.details = details self.max_value = max_value self.pop = pop self.gen = gen self.mutate_prob = mutate_prob self.mate_prob = mate_prob self.migrate_prob = migrate_prob self.verbose = verbose self.cal_dim = cal_dim self.re_hall = re_hall self.re_Tree = re_Tree self.store = store self.limit_type = limit_type self.data_all = [] self.personal_map = personal_map self.stop_condition = stop_condition self.population = [] self.rand_state = None self.random_state = random_state self.sub_mu_max = sub_mu_max self.population_next = [] self.cpset = CalculatePrecisionSet(pset, scoring=scoring, score_pen=score_pen, filter_warning=filter_warning, cal_dim=cal_dim, add_coef=add_coef, inter_add=inter_add, inner_add=inner_add, vector_add=vector_add, out_add=out_add, flat_add=flat_add, cv=cv, n_jobs=n_jobs, batch_size=batch_size, tq=tq, fuzzy=fuzzy, dim_type=dim_type, details=details, classification=classification, score_object=score_object, batch_para=batch_para ) Fitness_ = newclass.create("Fitness_", Fitness, weights=score_pen) self.PTree = newclass.create("PTrees", SymbolTree, fitness=Fitness_) # def produce if initial_min is None: initial_min = 2 self.register("genGrow", genGrow, pset=self.cpset, min_=initial_min, max_=initial_max + 1, personal_map=self.personal_map) self.register("genFull", genFull, pset=self.cpset, min_=initial_min, max_=initial_max + 1, personal_map=self.personal_map) self.register("genHalf", genGrow, pset=self.cpset, min_=initial_min, max_=initial_max + 1, personal_map=self.personal_map) self.register("gen_mu", genGrow, min_=1, max_=self.sub_mu_max + 1, personal_map=self.personal_map) # def selection self.register("select", selTournament, tournsize=2) self.register("selKbestDim", selKbestDim, dim_type=self.cpset.dim_type, fuzzy=self.cpset.fuzzy) self.register("selBest", selBest) self.register("mate", cxOnePoint) # def mutate self.register("mutate", mutUniform, expr=self.gen_mu, pset=self.cpset) self.decorate("mate", staticLimit(key=operator.attrgetter(limit_type), max_value=self.max_value)) self.decorate("mutate", staticLimit(key=operator.attrgetter(limit_type), max_value=self.max_value)) if stats is None: if cal_dim: if score_pen[0] > 0: stats = {"fitness_dim_max": ("max",), "dim_is_target": ("sum",)} else: stats = {"fitness_dim_min": ("min",), "dim_is_target": ("sum",)} else: if score_pen[0] > 0: stats = {"fitness": ("max",)} else: stats = {"fitness": ("min",)} self.stats = Statis_func(stats=stats) logbook = Logbook() logbook.header = ['gen'] + (self.stats.fields if self.stats else []) self.logbook = logbook if hall is None: hall = 1 self.hall = HallOfFame(hall) if re_hall is None: self.re_hall = None else: if re_hall == 1 or re_hall == 0: print("re_hall should more than 1") re_hall = 2 assert re_hall >= hall, "re_hall should more than hall" self.re_hall = HallOfFame(re_hall)
class BaseLoop(Toolbox): """ Base loop for BGP. Examples:: if __name__ == "__main__": pset = SymbolSet() stop = lambda ind: ind.fitness.values[0] >= 0.880963 bl = BaseLoop(pset=pset, gen=10, pop=1000, hall=1, batch_size=40, re_hall=3, \n n_jobs=12, mate_prob=0.9, max_value=5, initial_min=1, initial_max=2, \n mutate_prob=0.8, tq=True, dim_type="coef", stop_condition=stop,\n re_Tree=0, store=False, random_state=1, verbose=True,\n stats={"fitness_dim_max": ["max"], "dim_is_target": ["sum"]},\n add_coef=True, inter_add=True, inner_add=False, cal_dim=True, vector_add=False,\n personal_map=False) bl.run() """ def __init__(self, pset, pop=500, gen=20, mutate_prob=0.5, mate_prob=0.8, hall=1, re_hall=1, re_Tree=None, initial_min=None, initial_max=3, max_value=5, scoring=(r2_score,), score_pen=(1,), filter_warning=True, cv=1, add_coef=True, inter_add=True, inner_add=False, vector_add=False, out_add=False, flat_add=False, cal_dim=False, dim_type=None, fuzzy=False, n_jobs=1, batch_size=40, random_state=None, stats=None, verbose=True, migrate_prob=0, tq=True, store=False, personal_map=False, stop_condition=None, details=False, classification=False, score_object="y", sub_mu_max=1, limit_type="h_bgp", batch_para=False): """ Parameters ---------- pset:SymbolSet the feature x and target y and others should have been added. pop: int number of population. gen: int number of generation. mutate_prob:float probability of mutate. mate_prob:float probability of mate(crossover). initial_max:int max initial size of expression when first producing. initial_min : None,int min initial size of expression when first producing. max_value:int max size of expression. limit_type: "height" or "length",","h_bgp" limitation type for max_value, but don't affect initial_max, initial_min. hall:int,>=1 number of HallOfFame (elite) to maintain. re_hall:None or int>=2 Notes: only valid when hall number of HallOfFame to add to next generation. re_Tree: int number of new features to add to next generation. 0 is false to add. personal_map:bool or "auto" "auto" is using 'premap' and with auto refresh the 'premap' with individual.\n True is just using constant 'premap'.\n False is just use the prob of terminals. scoring: list of Callable, default is [sklearn.metrics.r2_score,] See Also ``sklearn.metrics`` score_pen: tuple of 1, -1 or float but 0. >0 : max problem, best is positive, worse -np.inf. <0 : min problem, best is negative, worse np.inf. Notes: if multiply score method, the scores must be turn to same dimension in prepossessing or weight by score_pen. Because the all the selection are stand on the mean(w_i*score_i) Examples:: scoring = [r2_score,] score_pen= [1,] cv:sklearn.model_selection._split._BaseKFold,int the shuffler must be False, default=1 means no cv. filter_warning:bool filter warning or not. add_coef:bool add coef in expression or not. inter_add:bool add intercept constant or not. inner_add:bool add inner coefficients or not. out_add:bool add out coefficients or not. flat_add:bool add flat coefficients or not. n_jobs:int default 1, advise 6. batch_size:int default 40, depend of machine. random_state:int None,int. cal_dim:bool escape the dim calculation. dim_type:Dim or None or list of Dim "coef": af(x)+b. a,b have dimension,f(x)'s dimension is not dnan. \n "integer": af(x)+b. f(x) is with integer dimension. \n [Dim1,Dim2]: f(x)'s dimension in list. \n Dim: f(x) ~= Dim. (see fuzzy) \n Dim: f(x) == Dim. \n None: f(x) == pset.y_dim fuzzy:bool choose the dim with same base with dim_type, such as m,m^2,m^3. stats:dict details of logbook to show. \n Map:\n values = {"max": np.max, "mean": np.mean, "min": np.mean, "std": np.std, "sum": np.sum} keys = {\n "fitness": just see fitness[0], \n "fitness_dim_max": max problem, see fitness with demand dim,\n "fitness_dim_min": min problem, see fitness with demand dim,\n "dim_is_target": demand dim,\n "coef": dim is True, coef have dim, \n "integer": dim is integer, \n ... } if stats is None, default is: for cal_dim=True: stats = {"fitness_dim_max": ("max",), "dim_is_target": ("sum",)} for cal_dim=False stats = {"fitness": ("max",)} if self-definition, the key is func to get attribute of each ind. Examples:: def func(ind): return ind.fitness[0] stats = {func: ("mean",), "dim_is_target": ("sum",)} verbose:bool print verbose logbook or not. tq:bool print progress bar or not. store:bool or path bool or path. stop_condition:callable stop condition on the best ind of hall, which return bool,the true means stop loop. Examples:: def func(ind): c = ind.fitness.values[0]>=0.90 return c details:bool return expr and predict_y or not. classification: bool classification or not. score_object: score by y or delta y (for implicit function). """ super(BaseLoop, self).__init__() assert initial_max <= max_value, "the initial size of expression should less than max_value limitation" if cal_dim: assert all( [isinstance(i, Dim) for i in pset.dim_ter_con.values()]), \ "all import dim of pset should be Dim object." self.details = details self.max_value = max_value self.pop = pop self.gen = gen self.mutate_prob = mutate_prob self.mate_prob = mate_prob self.migrate_prob = migrate_prob self.verbose = verbose self.cal_dim = cal_dim self.re_hall = re_hall self.re_Tree = re_Tree self.store = store self.limit_type = limit_type self.data_all = [] self.personal_map = personal_map self.stop_condition = stop_condition self.population = [] self.rand_state = None self.random_state = random_state self.sub_mu_max = sub_mu_max self.population_next = [] self.cpset = CalculatePrecisionSet(pset, scoring=scoring, score_pen=score_pen, filter_warning=filter_warning, cal_dim=cal_dim, add_coef=add_coef, inter_add=inter_add, inner_add=inner_add, vector_add=vector_add, out_add=out_add, flat_add=flat_add, cv=cv, n_jobs=n_jobs, batch_size=batch_size, tq=tq, fuzzy=fuzzy, dim_type=dim_type, details=details, classification=classification, score_object=score_object, batch_para=batch_para ) Fitness_ = newclass.create("Fitness_", Fitness, weights=score_pen) self.PTree = newclass.create("PTrees", SymbolTree, fitness=Fitness_) # def produce if initial_min is None: initial_min = 2 self.register("genGrow", genGrow, pset=self.cpset, min_=initial_min, max_=initial_max + 1, personal_map=self.personal_map) self.register("genFull", genFull, pset=self.cpset, min_=initial_min, max_=initial_max + 1, personal_map=self.personal_map) self.register("genHalf", genGrow, pset=self.cpset, min_=initial_min, max_=initial_max + 1, personal_map=self.personal_map) self.register("gen_mu", genGrow, min_=1, max_=self.sub_mu_max + 1, personal_map=self.personal_map) # def selection self.register("select", selTournament, tournsize=2) self.register("selKbestDim", selKbestDim, dim_type=self.cpset.dim_type, fuzzy=self.cpset.fuzzy) self.register("selBest", selBest) self.register("mate", cxOnePoint) # def mutate self.register("mutate", mutUniform, expr=self.gen_mu, pset=self.cpset) self.decorate("mate", staticLimit(key=operator.attrgetter(limit_type), max_value=self.max_value)) self.decorate("mutate", staticLimit(key=operator.attrgetter(limit_type), max_value=self.max_value)) if stats is None: if cal_dim: if score_pen[0] > 0: stats = {"fitness_dim_max": ("max",), "dim_is_target": ("sum",)} else: stats = {"fitness_dim_min": ("min",), "dim_is_target": ("sum",)} else: if score_pen[0] > 0: stats = {"fitness": ("max",)} else: stats = {"fitness": ("min",)} self.stats = Statis_func(stats=stats) logbook = Logbook() logbook.header = ['gen'] + (self.stats.fields if self.stats else []) self.logbook = logbook if hall is None: hall = 1 self.hall = HallOfFame(hall) if re_hall is None: self.re_hall = None else: if re_hall == 1 or re_hall == 0: print("re_hall should more than 1") re_hall = 2 assert re_hall >= hall, "re_hall should more than hall" self.re_hall = HallOfFame(re_hall) def varAnd(self, *arg, **kwargs): return varAnd(*arg, **kwargs) def to_csv(self, data_all): """store to csv""" if self.store: if isinstance(self.store, str): path = self.store else: path = os.getcwd() file_new_name = "_".join((str(self.pop), str(self.gen), str(self.mutate_prob), str(self.mate_prob), str(time.time()))) try: st = Store(path) st.to_csv(data_all, file_new_name, transposition=True) print("store data to ", path, file_new_name) except (IOError, PermissionError): st = Store(os.getcwd()) st.to_csv(data_all, file_new_name, transposition=True) print("store data to ", os.getcwd(), file_new_name) def maintain_halls(self, population): """maintain the best p expression""" if self.re_hall is not None: maxsize = max(self.hall.maxsize, self.re_hall.maxsize) if self.cal_dim: inds_dim = self.selKbestDim(population, maxsize) else: inds_dim = self.selBest(population, maxsize) self.hall.update(inds_dim) self.re_hall.update(inds_dim) sole_inds = [i for i in self.re_hall.items if i not in inds_dim] inds_dim.extend(sole_inds) else: if self.cal_dim: inds_dim = self.selKbestDim(population, self.hall.maxsize) else: inds_dim = self.selBest(population, self.hall.maxsize) self.hall.update(inds_dim) inds_dim = [] inds_dim = copy.deepcopy(inds_dim) return inds_dim def re_add(self): """add the expression as a feature""" if self.hall.items and self.re_Tree: it = self.hall.items indo = it[random.choice(len(it))] ind = copy.deepcopy(indo) inds = ind.depart() if not inds: pass else: inds = [self.cpset.calculate_detail(indi) for indi in inds] le = min(self.re_Tree, len(inds)) indi = inds[random.choice(le)] self.cpset.add_tree_to_features(indi) def re_fresh_by_name(self, *arr): re_name = ["mutate", "genGrow", "genFull", "genHalf"] if len(arr) > 0: re_name.extend(arr) self.refresh(re_name, pset=self.cpset) # for i in re_name + ["mate"]: # don‘t del this # self.decorate(i, staticLimit(key=operator.attrgetter("height"), max_value=2 * (self.max_value + 1))) def top_n(self, n=10, gen=-1, key="value", filter_dim=True, ascending=False): """ Return the best n results. Note: Only valid in ``store=True``. Parameters ---------- n:int n. gen: the generation, default is -1. key: str sort keys, default is "values". filter_dim: filter no-dim expressions or not. ascending: reverse. Returns ------- top n results. pd.DataFrame """ import pandas as pd if self.store == "False": raise TypeError("Only valid with store=True") data = self.data_all data = pd.DataFrame(data) if gen == -1: gen = max(data["gen"]) data = data[data["gen"] == gen] if filter_dim: data = data[data["dim_score"] == 1] data = data.drop_duplicates(['expr'], keep="first") if key is not None: data[key] = data[key].str.replace("(", "") data[key] = data[key].str.replace(")", "") data[key] = data[key].str.replace(",", "") try: data[key] = data[key].astype(float) except ValueError: raise TypeError("check this key column can be translated into float") data = data.sort_values(by='value', ascending=ascending).iloc[:n, :] return data def check_height_length(self, pop, site=""): old = len(pop) if self.limit_type == 'height': pop = [i for i in pop if i.height <= self.max_value] elif self.limit_type == 'length': pop = [i for i in pop if i.length <= self.max_value] else: pop = [i for i in pop if i.h_bgp <= self.max_value] new = len(pop) if old == new: pass else: if site != "": print(site) # raise TypeError index = random.randint(0, new, old - new) pop.extend([pop[i] for i in index]) return pop def run(self, warm_start=False, new_gen=None): """ Parameters ---------- warm_start:bool warm_start from last result. new_gen: new generations for warm_startm, default is the initial generations. """ # 1.generate################################################################### if warm_start is False: random.seed(self.random_state) population = [self.PTree(self.genHalf()) for _ in range(self.pop)] gen_i = 0 gen = self.gen else: assert self.population_next != [] random.set_state(self.rand_state) population = self.population_next gen_i = self.gen_i self.re_fresh_by_name() if new_gen: gen = gen_i + new_gen else: gen = gen_i + self.gen for gen_i in range(gen_i + 1, gen + 1): population_old = copy.deepcopy(population) # 2.evaluate############################################################### invalid_ind_score = self.cpset.parallelize_score(population_old) for ind, score in zip(population_old, invalid_ind_score): ind.fitness.values = tuple(score[0]) ind.y_dim = score[1] ind.dim_score = score[2] ind.coef_expr = score[3] ind.coef_pre_y = score[4] population = population_old # 3.log################################################################### # 3.1.log-print############################## record = self.stats.compile(population) if self.stats else {} self.logbook.record(gen=gen_i, **record) if self.verbose: print(self.logbook.stream) # 3.2.log-store############################## if self.store: datas = [{"gen": gen_i, "name": str(pop_i), "expr": str([pop_i.coef_expr]), "value": str(pop_i.fitness.values), "dimension": str(pop_i.y_dim), "dim_score": pop_i.dim_score} for pop_i in population] self.data_all.extend(datas) self.population = copy.deepcopy(population) # 3.3.log-hall############################### inds_dim = self.maintain_halls(population) # 4.refresh################################################################ # 4.1.re_update the premap ################## if self.personal_map == "auto": [self.cpset.premap.update(indi, self.cpset) for indi in inds_dim] # 4.2.re_add_tree and refresh pset########### if self.re_Tree: self.re_add() self.re_fresh_by_name() # 6.next generation !!!!####################################################### # selection and mutate,mate,migration population = self.select(population, int((1 - self.migrate_prob) * len(population)) - len(inds_dim)) offspring = self.varAnd(population, self, self.mate_prob, self.mutate_prob) offspring.extend(inds_dim) migrate_pop = [self.PTree(self.genFull()) for _ in range(int(self.migrate_prob * len(population)))] population[:] = offspring + migrate_pop population = self.check_height_length(population) # 5.break####################################################### if self.stop_condition is not None: if self.stop_condition(self.hall.items[0]): break # 7 freeze ################################################### self.rand_state = random.get_state() self.population_next = population self.gen_i = gen_i # final.store##################################################################### if self.store: self.to_csv(self.data_all) self.hall.items = [self.cpset.calculate_detail(indi) for indi in self.hall.items] return self.hall
sl = [SymbolTree.genGrow(pset0, 3, 4) for _ in range(500)] a = time.time() # sli =" MAdd(Sub(Add(Mul(x0, gx1), exp(x10)), Mul(Conv(Add(x0, gx0)), Mul(x6, MAdd(x10)))))" # sl =["MAdd(gx1 * x11 * (-x0 + x11) * MAdd(gx1))"] # sl = [compile_context(sli, pset0.context, pset0.gro_ter_con) for sli in sl] # sl = [simple(sli.args[0], pset0.gro_ter_con) for sli in sl if len(sli.args)>0] c = 1 a = time.time() pset0 = CalculatePrecisionSet(pset0, scoring=None, score_pen=(1, ), filter_warning=True, cal_dim=False, dim_type=None, fuzzy=False, add_coef=False, inter_add=True, inner_add=False, n_jobs=1, batch_size=20, tq=True) from sys import getsizeof T100 = getsizeof(sl) psize = getsizeof(pset0) # print(T100,psize) for i in sl: b = time.time() i0 = compile_context(i, pset0.context, pset0.gro_ter_con) r2 = time.time()
x[:, 2] = x[:, 0] / x[:, 1] y = np.zeros(x.shape[0]) x = x y = y pset = SymbolSet() pset.add_features(x, y) pset.add_operations( categories=("Add", "Mul", "Self", "Abs"), self_categories=None) from sklearn.metrics import r2_score, mean_squared_error cp = CalculatePrecisionSet(pset, scoring=[r2_score, mean_squared_error], score_pen=[1, -1], filter_warning=True) x0, x1, x2 = sympy.symbols("x0, x1, x2") # t=Function("t") # expr00 = (x2*x1+x0*x2*2).subs(x0, t(x1)) # dv1 = sympy.diff(expr00, x1, evaluate=True) # dv1 = dv1.subs(t(x1), x0) # # t = Function("t") # expr00 = (x2*x1+x0*x2*2).subs(x1, t(x0)) # dv2 = sympy.diff(expr00, x0, evaluate=True) # dv2 = dv2.subs(t(x0), x1) # # k = dv1/dv2 #
def test_pset_passed_to_cpset_will_change(self): cp = CalculatePrecisionSet(self.pset) self.assertNotEqual(cp, self.cp)
class MyTestbase(unittest.TestCase): def setUp(self): self.SymbolTree = SymbolTree self.pset = SymbolSet() from sklearn.datasets import fetch_california_housing data = fetch_california_housing() x = data["data"][:100] y = data["target"][:100] # No = Normalizer() # y=y/max(y) # x = No.fit_transform(x) self.x = x self.y = y # self.pset.add_features(x, y, ) self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]]) self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None) self.pset.add_operations(power_categories=(2, 3, 0.5), categories=("Add", "Mul", "Self", "Abs"), self_categories=None) from sklearn.metrics import r2_score, mean_squared_error self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error], score_pen=[1, -1], filter_warning=True) def test_pset_passed_to_cpset_will_change(self): cp = CalculatePrecisionSet(self.pset) self.assertNotEqual(cp, self.cp) def test_tree_gengrow_repr_and_str_different(self): from numpy import random random.seed(1) sl = SymbolTree.genGrow(self.pset, 3, 4) print(sl) # self.assertNotEqual(repr(sl), str(sl)) def test_add_tree_back(self): from numpy import random random.seed(1) sl = SymbolTree.genGrow(self.pset, 3, 4) self.pset.add_tree_to_features(sl) # def test_barch_tree(self): from numpy import random random.seed(1) for i in range(10): sl = SymbolTree.genGrow(self.pset, 3, 4) cpsl = self.cp.calculate_detail(sl) self.assertIsNotNone(cpsl.y_dim) self.assertIsNotNone(cpsl.expr) self.assertIsNone(cpsl.p_name) if cpsl.pre_y is not None: self.assertIsInstance(cpsl.pre_y, numpy.ndarray) self.assertEqual(cpsl.pre_y.shape, self.y.shape) print(cpsl.coef_pre_y[:3]) print(cpsl.pre_y[:3]) print(cpsl.coef_score) print(cpsl.coef_expr) print(cpsl.pure_expr) def test_depart_tree(self): from numpy import random random.seed(1) for i in range(10): sl = SymbolTree.genGrow(self.pset, 5, 6) sl_departs = sl.depart() for i in sl_departs: cpsl = self.cp.calculate_simple(i) self.assertIsNotNone(cpsl.y_dim) self.assertIsNotNone(cpsl.expr) self.assertIsNone(cpsl.p_name)