Exemple #1
0
    pset.add_features(
        x,
        y,
        x_group=group,
    )
    pset.add_accumulative_operation(categories=("MAdd", "MMul", "MSub", "MDiv",
                                                "Conv", "Self"),
                                    special_prob={
                                        "MAdd": 0.16,
                                        "MMul": 0.16,
                                        "MSub": 0.16,
                                        "MDiv": 0.16,
                                        "Conv": 0.16,
                                        "Self": 0.16
                                    })
    pset.add_operations(categories=("Add", "Mul", "Sub", "Div"))

    s = pset.free_symbol[1]
    ss = []
    for si in s:
        if isinstance(si, sympy.Symbol):
            ss.append(si)
        else:
            ss.extend(si)

    target = (ss[0] + ss[1]) * (ss[2] - ss[3])
    target = sympy.simplify(target)
    # a = time.time()
    random.seed(4)
    population = [
        SymbolTree.genFull(pset, int(height - 1),
Exemple #2
0
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=0)

    store = Store()

    # symbolset
    pset0 = SymbolSet()
    pset0.add_features(X_train, y_train)
    pset0.add_constants(c=[
        1,
    ])

    pset0.add_operations(
        power_categories=(2, ),
        categories=("Add", "exp", "Neg"),
    )
    h_bgp = 3

    # stop = None
    # This random_state is under Linux system. For others system ,the random_state maybe different。
    # try with different random_state.
    stop = lambda ind: ind.fitness.values[0] >= 0.99
    sl = SymbolLearning(loop='MultiMutateLoop',
                        pset=pset0,
                        gen=10,
                        pop=3000,
                        hall=1,
                        batch_size=40,
                        re_hall=5,
                        n_jobs=12,
Exemple #3
0
    def fit(self,
            X=None,
            y=None,
            c=None,
            x_group=None,
            x_dim=1,
            y_dim=1,
            c_dim=1,
            x_prob=None,
            c_prob=None,
            pset=None,
            power_categories=(2, 3, 0.5),
            categories=("Add", "Mul", "Sub", "Div"),
            warm_start=False,
            new_gen=None):
        """
        Method 1. fit with x, y.

        Examples::

            sl = SymbolLearning()
            sl..fit(x,y,...)

        Method 2. fit with customized pset. If need more self-definition, use one defined SymbolSet object to ``pset``.

        Examples::

            pset = SymbolSet()
            pset.add_features_and_constants(...)
            pset.add_operations(...)
            ...
            sl = SymbolLearning()
            sl..fit(pset=pset)

        Parameters
        ----------
        X:np.ndarray
            data.
        y:np.ndarray
            y.
        c:list of float, None
            constants.
        x_dim: 1 or list of Dim
            the same size wih x.shape[1], default 1 is dless for all x.
        y_dim: 1,Dim
            dim of y.
        c_dim: 1,list of Dim
            the same size wih c.shape, default 1 is dless for all c.

        x_prob: None,list of float
            the same size wih x.shape[1].
        c_prob: None,list of float
            the same size wih c.
        x_group:list of list
            Group of x.

            Examples:

                x_group=[[1,2],]
                or
                x_group=2

            See Also :py:func:`bgp.base.SymbolSet.add_features`

        power_categories: Sized,tuple, None
            Examples:(0.5,2,3)
        categories: tuple of str
            map table:
                    {"Add": sympy.Add, 'Sub': Sub, 'Mul': sympy.Mul, 'Div': Div}
                    {"sin": sympy.sin, 'cos': sympy.cos, 'exp': sympy.exp, 'ln': sympy.ln,
                    {'Abs': sympy.Abs, "Neg": functools.partial(sympy.Mul, -1.0),
                    "Rec": functools.partial(sympy.Pow, e=-1.0)}

                    Others:  \n
                    "Rem":  f(x)=1-x,if x true \n
                    "Self":  f(x)=x,if x true \n

        pset:SymbolSet
            See Also SymbolSet.
        warm_start: bool
            warm start or not.

            Note:
                If you offer pset in advance by user, please check carefully the feature numbers,especially when use ``re_Tree``.
                because the new features are add.
            Reference:
                CalculatePrecisionSet.update_with_X_y.
        new_gen: None,int
            warm_start generation.

        """

        # try to find pest form args,kwargs
        psets = [i for i in self.args if isinstance(i, SymbolSet)]
        if len(psets) > 0:
            self.args.remove(psets[0])
        if "pset" in self.kwargs:
            psets.append(self.kwargs["pset"])
            del self.kwargs["pset"]

        if pset is None:
            if len(psets) > 0:
                pset = psets[0]

        if pset is None:
            # one simple pset are generate with no dimension calculation, But just with x_group.\n

            if X is not None and y is not None:
                pset = SymbolSet()
                pset.add_features_and_constants(X,
                                                y,
                                                c,
                                                x_dim=x_dim,
                                                y_dim=y_dim,
                                                c_dim=c_dim,
                                                x_prob=x_prob,
                                                c_prob=c_prob,
                                                x_group=x_group,
                                                feature_name=None)
                pset.add_operations(power_categories=power_categories,
                                    categories=categories)

            elif hasattr(self.loop, "gen"):
                pass
            else:
                raise ValueError(
                    "The pset should be defined or the X and Y should be offered."
                )
        ####################################

        if warm_start:
            assert hasattr(
                self.loop,
                "gen"), "Before the warm_start, Need fit at least one time"
            if X is not None and y is not None:
                self.loop.cpset.update_with_X_y(X, y)
            elif pset:
                # the warm_start are not compacting with "re_Tree"
                self.loop.cpset.update(pset)
            else:
                raise ValueError(
                    "The pset should be defined or the X and Y should be offered."
                )

            self.loop.re_fresh_by_name()

            hall = self.loop.run(warm_start=True, new_gen=new_gen)
        else:
            if hasattr(self.loop, "gen"):
                loops = self.loop.__class__
                self.loop = loops(pset, *self.args, **self.kwargs)
            else:
                self.loop = self.loop(pset, *self.args, **self.kwargs)

            hall = self.loop.run()

        self.best_one = hall.items[0]
        try:
            expr = general_expr(self.best_one.coef_expr, self.loop.cpset)
            self.expr_type = "single"
        except (RecursionError, RuntimeWarning):
            expr = self.best_one.coef_expr
            self.expr_type = "group"

        self.expr = expr
        self.y_dim = self.best_one.y_dim
        self.fitness = self.best_one.fitness.values[0]
Exemple #4
0
    x_u = [kg] * 13
    y_u = kg
    c_u = [dless, dless, dless]

    x, x_dim = Dim.convert_x(x, x_u, target_units=None, unit_system="SI")
    y, y_dim = Dim.convert_xi(y, y_u)
    c, c_dim = Dim.convert_x(c, c_u)

    t = time.time()

    # symbolset
    pset0 = SymbolSet()
    pset0.add_features(x,
                       y,
                       x_dim=x_dim,
                       y_dim=y_dim,
                       x_group=[[1, 2], [3, 4, 5]])
    pset0.add_constants(c, c_dim=c_dim, c_prob=None)
    pset0.add_operations(power_categories=(2, 3, 0.5),
                         categories=("Add", "Mul", "Sub", "Div", "exp"),
                         self_categories=None)

    random.seed(0)
    z = time.time()
    sl = [SymbolTree.genGrow(pset0, 3, 4) for _ in range(100)]
    a = time.time()
    sl = [compile_context(sli, pset0.context, pset0.gro_ter_con) for sli in sl]
    b = time.time()

    print(b - a, a - z)
Exemple #5
0
    SL_data = data.SL_data
    si_transformer = data.si_transformer

    store = Store()

    x, x_dim, y, y_dim, c, c_dim, X, Y = SL_data
    x_g = np.arange(x.shape[1])
    x_g = list(x_g[1:])

    x_g = x_g.reshape(-1, 2)
    pset0 = SymbolSet()
    pset0.add_features(x, y, x_dim=x_dim, y_dim=y_dim, x_group=x_g)
    pset0.add_constants(c, c_dim=c_dim, c_prob=0.05)
    pset0.add_operations(power_categories=(2, 3, 0.5, 1 / 3, 4, 1 / 4),
                         # categories=("Mul",),
                         categories=("Add", "Mul", "Sub", "Div", "exp", "ln"),
                         self_categories=None)

    total_height = 3
    h_bgp = 2
    # This random_state is under Linux system. For others system ,the random_state maybe different,please
    # try with different random_state.
    for i in range(1, 10):
        stop = lambda ind: ind.fitness.values[0] >= 0.95
        sl = SymbolLearning(loop="MultiMutateLoop", pset=pset0, gen=20, pop=1000, hall=1, batch_size=40, re_hall=3,
                            n_jobs=12, mate_prob=0.9, max_value=h_bgp, initial_min=2, initial_max=h_bgp,
                            mutate_prob=0.8, tq=False, dim_type="coef", stop_condition=stop,
                            re_Tree=0, store=False, random_state=4, verbose=True,
                            # stats=None,
                            stats={"fitness_dim_max": ["max"], "dim_is_target": ["sum"], "h_bgp": ["mean"]},
                            add_coef=True, inter_add=True, out_add=True, cal_dim=True, vector_add=True,
Exemple #6
0
    x_u = [kg] * 13
    y_u = kg
    c_u = [dless, dless, dless]

    x, x_dim = Dim.convert_x(x, x_u, target_units=None, unit_system="SI")
    y, y_dim = Dim.convert_xi(y, y_u)
    c, c_dim = Dim.convert_x(c, c_u)

    z = time.time()

    # symbolset
    pset0 = SymbolSet()
    pset0.add_features(x, y, x_dim=x_dim, y_dim=y_dim, x_group=[[1, 2], [3, 4], [5, 6]])
    pset0.add_constants(c, c_dim=c_dim, c_prob=None)
    pset0.add_operations(power_categories=(2, 3, 0.5),
                         categories=("Add", "Mul", "Sub", "Div", "exp", "Abs"))

    # a = time.time()

    bl = MultiMutateLoop(pset=pset0, gen=20, pop=2000, hall=2, batch_size=60, re_hall=2,
                         n_jobs=1, mate_prob=1, max_value=3, initial_max=1, initial_min=1,
                         mutate_prob=0.8, tq=True, dim_type="coef",
                         re_Tree=None, store=False, random_state=2,
                         stats={"fitness_dim_max": ["max"], "dim_is_target": ["sum"], "h_bgp": ["max"]},
                         add_coef=True, cal_dim=False, inner_add=False, vector_add=True, personal_map=False)
    # b = time.time()
    bl.run()
    bl.run(warm_start=True)

    # population = [bl.PTree(bl.genFull()) for _ in range(30)]
    # pset = bl.cpset
Exemple #7
0
    # unittest.main()
    import numpy as np

    x = np.array([[10, 6, 3, 4, 5, 6, 7, 8, 9, 9, 10, 9, 7, 5, 3, 1],
                  [1, 2, 3, 4, 4, 3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 15],
                  [2, 3, 4, 8, 12, 16, 30, 32, 33, 30, 20, 10, 5, 3, 2, 1]]).T
    x[:, 2] = x[:, 0] / x[:, 1]
    y = np.zeros(x.shape[0])
    x = x
    y = y

    pset = SymbolSet()

    pset.add_features(x, y)
    pset.add_operations(
        categories=("Add", "Mul", "Self", "Abs"),
        self_categories=None)

    from sklearn.metrics import r2_score, mean_squared_error

    cp = CalculatePrecisionSet(pset, scoring=[r2_score, mean_squared_error],
                               score_pen=[1, -1],
                               filter_warning=True)
    x0, x1, x2 = sympy.symbols("x0, x1, x2")

    # t=Function("t")
    # expr00 = (x2*x1+x0*x2*2).subs(x0, t(x1))
    # dv1 = sympy.diff(expr00, x1, evaluate=True)
    # dv1 = dv1.subs(t(x1), x0)
    #
    # t = Function("t")
Exemple #8
0
    gpa_dim = Dim.convert_to_Dim(1e9 * pa, unit_system="SI")
    j_d_mol_dim = Dim.convert_to_Dim(1000 * J / mol, unit_system="SI")
    K_dim = Dim.convert_to_Dim(K, unit_system="SI")
    kg_d_m3_dim = Dim.convert_to_Dim(kg / m**3, unit_system="SI")

    # 忽视缩放因子
    y_dim = dless
    x_dim = [
        dless, gpa_dim[1], j_d_mol_dim[1], K_dim[1], dless, kg_d_m3_dim[1]
    ]

    # 符号集合
    pset0 = SymbolSet()
    pset0.add_features(x, y, x_dim=x_dim, y_dim=y_dim)
    pset0.add_operations(
        power_categories=(2, 3, 0.5),
        categories=("Mul", "Div", "exp"),
    )

    # 符号回归

    # 方式选择1,系数加在最外层
    # sl = SymbolLearning(loop="MultiMutateLoop", pop=100, gen=2, random_state=1,pset=pset0,
    #                     classification=True, scoring=[metrics.accuracy_score, ], score_pen=[1, ],
    #                     cal_dim=True, n_jobs = 10,
    #                     store =True
    #                     )

    # # 方式选择2,系数加在最外层,认定系数可以自动补全量纲
    # pset0.y_dim=None
    # sl = SymbolLearning(loop="MultiMutateLoop", pop=1000, gen=3, random_state=1,pset=pset0,
    #                     classification=True, scoring=[metrics.accuracy_score, ], score_pen=[1, ],
Exemple #9
0
    X = np.concatenate((X, (X[:, 1] / X[:, 0]).reshape(-1, 1)), axis=1)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=3)

    store = Store()

    # symbolset
    pset0 = SymbolSet()
    pset0.add_features(X_train, y_train)
    # pset0.add_constants(c=[1, ])
    #pset0.add_operations(power_categories=(2,0.5),
    pset0.add_operations(
        # power_categories=(2,),
        categories=("exp", "Mul", "Sub"),
        self_categories=None)

    h_bgp = 3

    # stop = None
    # This random_state is under Linux system. For others system ,the random_state maybe different。
    # try with different random_state.
    stop = lambda ind: ind.fitness.values[0] >= 0.999
    sl = SymbolLearning(loop='MultiMutateLoop',
                        pset=pset0,
                        gen=10,
                        pop=3000,
                        hall=1,
                        batch_size=40,
                        re_hall=5,
Exemple #10
0
class MyTestgp(unittest.TestCase):
    def setUp(self):
        self.SymbolTree = SymbolTree
        self.pset = SymbolSet()

        from sklearn.datasets import fetch_california_housing

        data = fetch_california_housing()
        x = data["data"][:100]
        y = data["target"][:100]

        self.x = x
        self.y = y
        # self.pset.add_features(x, y, )
        self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]])
        self.pset.add_constants([6, 3, 4],
                                c_dim=[dless, dless, dless],
                                c_prob=None)
        self.pset.add_operations(power_categories=(2, 3, 0.5),
                                 categories=("Add", "Mul", "Neg", "Abs"),
                                 self_categories=None)

        from sklearn.metrics import r2_score, mean_squared_error

        self.cp = CalculatePrecisionSet(self.pset,
                                        scoring=[r2_score, mean_squared_error],
                                        score_pen=[1, -1],
                                        dim_type=None,
                                        filter_warning=True)

    def test_gp_flow(self):
        from numpy import random
        random.seed(1)
        cpset = self.cp
        # def Tree
        from deap.base import Fitness

        Fitness_ = newclass.create("Fitness_", Fitness, weights=(1, -1))
        PTree_ = newclass.create("PTrees_", SymbolTree, fitness=Fitness_)

        # def selection
        toolbox = Toolbox()

        # toolbox.register("select", selTournament, tournsize=3)
        toolbox.register("select", selKbestDim, dim_type=dless)
        # selBest
        toolbox.register("mate", cxOnePoint)
        # def mutate
        toolbox.register("generate", genGrow, pset=cpset, min_=2, max_=3)
        # toolbox.register("mutate", mutUniform, expr=toolbox.generate, pset=cpset)
        # toolbox.register("mutate", mutNodeReplacement, pset=cpset)
        toolbox.register("mutate", mutShrink, pset=cpset)

        toolbox.decorate(
            "mate", staticLimit(key=operator.attrgetter("height"),
                                max_value=10))
        toolbox.decorate(
            "mutate",
            staticLimit(key=operator.attrgetter("height"), max_value=10))
        # def elaluate

        # toolbox.register("evaluate", cpset.parallelize_calculate, n_jobs=4, add_coef=True,
        # inter_add=False, inner_add=False)

        # toolbox.register("parallel", parallelize, n_jobs=1, func=toolbox.evaluate, respective=False, tq=False)

        population = [PTree_.genGrow(cpset, 3, 4) for _ in range(10)]
        # si = sys.getsizeof(cpset)
        for i in range(5):
            invalid_ind = [ind for ind in population if not ind.fitness.valid]
            invalid_ind_score = cpset.parallelize_score(inds=invalid_ind)

            for ind, score in zip(invalid_ind, invalid_ind_score):
                ind.fitness.values = score[0]
                ind.y_dim = score[1]
            # si2 = sys.getsizeof(invalid_ind[0])
            # invalid_ind=[i.compress() for i in invalid_ind]
            # si3 = sys.getsizeof(invalid_ind[0])
            # print(si3,si2,si)
            population = toolbox.select(population, len(population))
            offspring = varAnd(population, toolbox, 1, 1)
            population[:] = offspring
Exemple #11
0
class MyTestbase(unittest.TestCase):

    def setUp(self):
        self.SymbolTree = SymbolTree
        self.pset = SymbolSet()

        from sklearn.datasets import fetch_california_housing

        data = fetch_california_housing()
        x = data["data"][:100]
        y = data["target"][:100]
        # No = Normalizer()
        # y=y/max(y)
        # x = No.fit_transform(x)
        self.x = x
        self.y = y
        # self.pset.add_features(x, y, )
        self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]])
        self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None)
        self.pset.add_operations(power_categories=(2, 3, 0.5),
                                 categories=("Add", "Mul", "Self", "Abs"),
                                 self_categories=None)

        from sklearn.metrics import r2_score, mean_squared_error
        self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error],
                                        score_pen=[1, -1],
                                        filter_warning=True)

    def test_pset_passed_to_cpset_will_change(self):
        cp = CalculatePrecisionSet(self.pset)
        self.assertNotEqual(cp, self.cp)

    def test_tree_gengrow_repr_and_str_different(self):
        from numpy import random
        random.seed(1)
        sl = SymbolTree.genGrow(self.pset, 3, 4)
        print(sl)
        # self.assertNotEqual(repr(sl), str(sl))

    def test_add_tree_back(self):
        from numpy import random
        random.seed(1)
        sl = SymbolTree.genGrow(self.pset, 3, 4)
        self.pset.add_tree_to_features(sl)

    #
    def test_barch_tree(self):
        from numpy import random
        random.seed(1)
        for i in range(10):

            sl = SymbolTree.genGrow(self.pset, 3, 4)
            cpsl = self.cp.calculate_detail(sl)
            self.assertIsNotNone(cpsl.y_dim)
            self.assertIsNotNone(cpsl.expr)
            self.assertIsNone(cpsl.p_name)
            if cpsl.pre_y is not None:
                self.assertIsInstance(cpsl.pre_y, numpy.ndarray)
                self.assertEqual(cpsl.pre_y.shape, self.y.shape)
                print(cpsl.coef_pre_y[:3])
                print(cpsl.pre_y[:3])
                print(cpsl.coef_score)
                print(cpsl.coef_expr)
                print(cpsl.pure_expr)

    def test_depart_tree(self):
        from numpy import random
        random.seed(1)
        for i in range(10):

            sl = SymbolTree.genGrow(self.pset, 5, 6)
            sl_departs = sl.depart()
            for i in sl_departs:
                cpsl = self.cp.calculate_simple(i)
                self.assertIsNotNone(cpsl.y_dim)
                self.assertIsNotNone(cpsl.expr)
                self.assertIsNone(cpsl.p_name)