예제 #1
0
    def setUp(self):
        self.SymbolTree = SymbolTree
        self.pset = SymbolSet()

        from sklearn.datasets import load_boston

        data = load_boston()
        x = data["data"]
        y = data["target"]
        # No = Normalizer()
        # y=y/max(y)
        # x = No.fit_transform(x)
        self.x = x
        self.y = y
        # self.pset.add_features(x, y, )
        self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]])
        self.pset.add_constants([6, 3, 4],
                                c_dim=[dless, dless, dless],
                                c_prob=None)
        self.pset.add_operations(power_categories=(2, 3, 0.5),
                                 categories=("Add", "Mul", "Self", "Abs"),
                                 self_categories=None)

        from sklearn.metrics import r2_score, mean_squared_error
        self.cp = CalculatePrecisionSet(self.pset,
                                        scoring=[r2_score, mean_squared_error],
                                        score_pen=[1, -1],
                                        filter_warning=True)
예제 #2
0
    def fit(self, X=None, y=None, c=None, x_group=None, pset=None):
        """

        If pset is None, one simple pset are generate with no dimension calculation, But just with x_group.\n
        If need more self-definition, use one defined SymbolSet object to pset.\n
        Examples:
            pset = SymbolSet()\n
            pset.add_features_and_constants(...)\n
            pset.add_operations(...)\n
            ...\n
            ...SymbolLearning().fit(pset=pset)\n

        Parameters
        ----------
        X:np.ndarray
        
        y:np.ndarray
        
        c:list of float
        
        x_group:list of list
            Group of x.\n
            See Also pset.add_features_and_constants
        pset:SymbolSet
            See Also SymbolSet

        """
        if pset is None:
            pset = SymbolSet()
            pset.add_features_and_constants(X,
                                            y,
                                            c,
                                            x_dim=1,
                                            y_dim=1,
                                            c_dim=1,
                                            x_prob=None,
                                            c_prob=None,
                                            x_group=x_group,
                                            feature_name=None)
            pset.add_operations(power_categories=(2, 3, 0.5),
                                categories=("Add", "Mul", "Sub", "Div"))

        self.loop = self.loop(pset, *self.args, **self.kwargs)
        hall = self.loop.run()
        self.best_one = hall.items[0]
        try:
            expr = general_expr(self.best_one.coef_expr, self.loop.cpset)
            self.expr_type = "single"
        except (RecursionError, RuntimeWarning):
            expr = self.best_one.coef_expr
            self.expr_type = "group"

        self.expr = expr
        self.y_dim = self.best_one.y_dim
        self.fitness = self.best_one.fitness.values[0]
예제 #3
0
class MyTestbase(unittest.TestCase):
    def setUp(self):
        self.SymbolTree = SymbolTree
        self.pset = SymbolSet()

        from sklearn.datasets import load_boston

        data = load_boston()
        x = data["data"]
        y = data["target"]
        # No = Normalizer()
        # y=y/max(y)
        # x = No.fit_transform(x)
        self.x = x
        self.y = y
        # self.pset.add_features(x, y, )
        self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]])
        self.pset.add_constants([6, 3, 4],
                                c_dim=[dless, dless, dless],
                                c_prob=None)
        self.pset.add_operations(power_categories=(2, 3, 0.5),
                                 categories=("Add", "Mul", "Self", "Abs"),
                                 self_categories=None)

        from sklearn.metrics import r2_score, mean_squared_error
        self.cp = CalculatePrecisionSet(self.pset,
                                        scoring=[r2_score, mean_squared_error],
                                        score_pen=[1, -1],
                                        filter_warning=True)

    def test_pset_passed_to_cpset_will_change(self):
        cp = CalculatePrecisionSet(self.pset)
        self.assertNotEqual(cp, self.cp)

    def test_tree_gengrow_repr_and_str_different(self):
        from numpy import random
        random.seed(1)
        sl = SymbolTree.genGrow(self.pset, 3, 4)
        print(sl)
        # self.assertNotEqual(repr(sl), str(sl))

    def test_add_tree_back(self):
        from numpy import random
        random.seed(1)
        sl = SymbolTree.genGrow(self.pset, 3, 4)
        self.pset.add_tree_to_features(sl)

    #
    def test_barch_tree(self):
        from numpy import random
        random.seed(1)
        for i in range(10):

            sl = SymbolTree.genGrow(self.pset, 3, 4)
            cpsl = self.cp.calculate_detail(sl)
            self.assertIsNotNone(cpsl.y_dim)
            self.assertIsNotNone(cpsl.expr)
            self.assertIsNone(cpsl.p_name)
            if cpsl.pre_y is not None:
                self.assertIsInstance(cpsl.pre_y, numpy.ndarray)
                self.assertEqual(cpsl.pre_y.shape, self.y.shape)
                print(cpsl.coef_pre_y[:3])
                print(cpsl.pre_y[:3])
                print(cpsl.coef_score)
                print(cpsl.coef_expr)
                print(cpsl.pure_expr)

    def test_depart_tree(self):
        from numpy import random
        random.seed(1)
        for i in range(10):

            sl = SymbolTree.genGrow(self.pset, 5, 6)
            sl_departs = sl.depart()
            for i in sl_departs:
                cpsl = self.cp.calculate_simple(i)
                self.assertIsNotNone(cpsl.y_dim)
                self.assertIsNotNone(cpsl.expr)
                self.assertIsNone(cpsl.p_name)
예제 #4
0
    c = [6, 3, 4]
    # unit
    from sympy.physics.units import kg

    x_u = [kg] * 13
    y_u = kg
    c_u = [dless, dless, dless]

    x, x_dim = Dim.convert_x(x, x_u, target_units=None, unit_system="SI")
    y, y_dim = Dim.convert_xi(y, y_u)
    c, c_dim = Dim.convert_x(c, c_u)

    t = time.time()

    # symbolset
    pset0 = SymbolSet()
    pset0.add_features(x,
                       y,
                       x_dim=x_dim,
                       y_dim=y_dim,
                       x_group=[[1, 2], [3, 4, 5]])
    pset0.add_constants(c, c_dim=c_dim, c_prob=None)
    pset0.add_operations(power_categories=(2, 3, 0.5),
                         categories=("Add", "Mul", "Sub", "Div", "exp"),
                         self_categories=None)

    random.seed(0)
    z = time.time()
    sl = [SymbolTree.genGrow(pset0, 3, 4) for _ in range(100)]
    a = time.time()
    sl = [compile_context(sli, pset0.context, pset0.gro_ter_con) for sli in sl]
예제 #5
0
    # c_unit
    c = [1, 5.290 * 10**-11, 1.74]
    c_u = [
        elementary_charge, m, dless, dless, dless, dless, dless, dless, dless
    ]
    """preprocessing"""
    x, x_dim = Dim.convert_x(x, x_u, target_units=None, unit_system="SI")
    y, y_dim = Dim.convert_xi(y, y_u)
    c, c_dim = Dim.convert_x(c, c_u)

    scal = MagnitudeTransformer(tolerate=1)
    x, y = scal.fit_transform_all(x, y, group=2)
    c = scal.fit_transform_constant(c)

    # symbolset
    pset0 = SymbolSet()
    x_g = np.arange(x.shape[1])
    x_g = x_g.reshape(-1, 2)
    x_g = list(x_g[1:])
    pset0.add_features(x,
                       y,
                       x_dim=x_dim,
                       y_dim=y_dim,
                       x_group=x_g,
                       feature_name=fea_name)
    pset0.add_constants(c, c_dim=c_dim, c_prob=0.05)
    pset0.add_operations(power_categories=(2, 3, 0.5),
                         categories=("Add", "Mul", "Sub", "Div", "exp", "ln"),
                         self_categories=None)

    height = 2
예제 #6
0
class MyTestgp(unittest.TestCase):

    def setUp(self):
        self.SymbolTree = SymbolTree
        self.pset = SymbolSet()

        from sklearn.datasets import load_boston

        data = load_boston()
        x = data["data"]
        y = data["target"]

        self.x = x
        self.y = y
        # self.pset.add_features(x, y, )
        self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]])
        self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None)
        self.pset.add_operations(power_categories=(2, 3, 0.5),
                                 categories=("Add", "Mul", "Neg", "Abs"),
                                 self_categories=None)

        from sklearn.metrics import r2_score, mean_squared_error

        self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error],
                                        score_pen=[1, -1],dim_type=None,
                                        filter_warning=True)

    def test_gp_flow(self):
        from numpy import random
        random.seed(1)
        cpset = self.cp
        # def Tree
        from deap.base import Fitness
        from featurebox.tools import newclass
        Fitness_ = newclass.create("Fitness_", Fitness, weights=(1, -1))
        PTree_ = newclass.create("PTrees_", SymbolTree, fitness=Fitness_)

        # def selection
        toolbox = Toolbox()

        # toolbox.register("select", selTournament, tournsize=3)
        toolbox.register("select", selKbestDim, dim_type=dless)
        # selBest
        toolbox.register("mate", cxOnePoint)
        # def mutate
        toolbox.register("generate", genGrow, pset=cpset, min_=2, max_=3)
        # toolbox.register("mutate", mutUniform, expr=toolbox.generate, pset=cpset)
        # toolbox.register("mutate", mutNodeReplacement, pset=cpset)
        toolbox.register("mutate", mutShrink,pset=cpset)

        toolbox.decorate("mate", staticLimit(key=operator.attrgetter("height"), max_value=10))
        toolbox.decorate("mutate", staticLimit(key=operator.attrgetter("height"), max_value=10))
        # def elaluate

        # toolbox.register("evaluate", cpset.parallelize_calculate, n_jobs=4, add_coef=True,
        # inter_add=False, inner_add=False)

        # toolbox.register("parallel", parallelize, n_jobs=1, func=toolbox.evaluate, respective=False, tq=False)

        population = [PTree_.genGrow(cpset, 3, 4) for _ in range(10)]
        # si = sys.getsizeof(cpset)
        for i in range(5):
            invalid_ind = [ind for ind in population if not ind.fitness.valid]
            invalid_ind_score = cpset.parallelize_score(inds=invalid_ind)

            for ind, score in zip(invalid_ind, invalid_ind_score):
                ind.fitness.values = score[0]
                ind.y_dim = score[1]
            # si2 = sys.getsizeof(invalid_ind[0])
            # invalid_ind=[i.compress() for i in invalid_ind]
            # si3 = sys.getsizeof(invalid_ind[0])
            # print(si3,si2,si)
            population = toolbox.select(population, len(population))
            offspring = varAnd(population, toolbox, 1, 1)
            population[:] = offspring
예제 #7
0
    c = [6, 3, 4]
    # unit
    from sympy.physics.units import kg

    x_u = [kg] * 13
    y_u = kg
    c_u = [dless, dless, dless]

    x, x_dim = Dim.convert_x(x, x_u, target_units=None, unit_system="SI")
    y, y_dim = Dim.convert_xi(y, y_u)
    c, c_dim = Dim.convert_x(c, c_u)

    t = time.time()

    # symbolset
    pset0 = SymbolSet()
    pset0.add_features(x,
                       y,
                       x_dim=x_dim,
                       y_dim=y_dim,
                       x_group=[[1, 2], [3, 4, 5], [6, 7]],
                       feature_name=["Ss%i" % i for i in range(13)])
    pset0.add_constants(c, c_dim=c_dim, c_prob=None)
    pset0.add_operations(power_categories=(2, 3, 0.5),
                         categories=("Add", "Mul", "Sub", "Div", "ln"),
                         self_categories=None)

    random.seed(2)
    z = time.time()
    sl = [SymbolTree.genGrow(pset0, 3, 4) for _ in range(500)]
    a = time.time()
예제 #8
0
import sympy

from featurebox.symbol.base import SymbolSet, SymbolTree
import numpy as np

from featurebox.symbol.calculation.translate import general_expr_dict, compile_context

if __name__ == "__main__":
    x = np.full((10, 4), fill_value=0.1)
    y = np.ones((10, ))

    height = 2
    # GVP
    group = 2
    pset = SymbolSet()
    pset.add_features(
        x,
        y,
        x_group=group,
    )
    pset.add_accumulative_operation(categories=("MAdd", "MMul", "MSub", "MDiv",
                                                "Conv", "Self"),
                                    special_prob={
                                        "MAdd": 0.16,
                                        "MMul": 0.16,
                                        "MSub": 0.16,
                                        "MDiv": 0.16,
                                        "Conv": 0.16,
                                        "Self": 0.16
                                    })