Exemple #1
0
def get_transformation_for_division(train_X_all, raw_features):

    unary_transformations: List[UnaryTransformation] = []
    binary_transformations: List[Transformation] = []

    #unary_transformations.append(PandasDiscretizerTransformation(number_bins=10))
    unary_transformations.append(MinMaxScalingTransformation())
    unary_transformations.append(MDLPDiscretizerTransformation())

    unary_transformations.append(OneDivisionTransformation())
    unary_transformations.append(MinusTransformation())

    unary_transformations.append(ImputationTransformation('mean'))
    #unary_transformations.append(ImputationTransformation('median'))
    #unary_transformations.append(ImputationTransformation('most_frequent'))

    binary_transformations.extend(
        HigherOrderCommutativeClassGenerator(
            2,
            methods=[np.nansum, np.nanprod],
            sympy_methods=[sympy.Add, sympy.Mul]).produce())

    binary_transformations.extend(
        GroupByThenGenerator(
            2,
            methods=[np.nanmax, np.nanmin, np.nanmean, np.nanstd],
            sympy_methods=[
                groupbythenmax, groupbythenmin, groupbythenmean, groupbythenstd
            ]).produce())

    unary_transformations.extend(
        OneHotGenerator(train_X_all, raw_features).produce())

    return unary_transformations, binary_transformations
Exemple #2
0
def get_transformation_for_cat_feature_space(train_X_all, raw_features):

    unary_transformations: List[UnaryTransformation] = []
    binary_transformations: List[Transformation] = []

    unary_transformations.append(
        PandasDiscretizerTransformation(number_bins=10))
    unary_transformations.append(MinMaxScalingTransformation())

    binary_transformations.extend(
        HigherOrderCommutativeClassGenerator(
            2,
            methods=[np.nansum, np.nanprod],
            sympy_methods=[sympy.Add, sympy.Mul]).produce())
    binary_transformations.extend(
        NumpyBinaryClassGenerator(methods=[np.divide, np.subtract],
                                  sympy_methods=[sympy_divide,
                                                 sympy_subtract]).produce())

    binary_transformations.extend(
        GroupByThenGenerator(
            2,
            methods=[np.nanmax, np.nanmin, np.nanmean, np.nanstd],
            sympy_methods=[
                groupbythenmax, groupbythenmin, groupbythenmean, groupbythenstd
            ]).produce())

    unary_transformations.extend(
        OneHotGenerator(train_X_all, raw_features).produce())

    return unary_transformations, binary_transformations
Exemple #3
0
    def produce_features(self):
        unary_transformations: List[UnaryTransformation] = []
        unary_transformations.append(
            PandasDiscretizerTransformation(number_bins=10))
        unary_transformations.append(MinMaxScalingTransformation())

        higher_order_transformations: List[Transformation] = []
        higher_order_transformations.extend(
            HigherOrderCommutativeClassGenerator(
                2, methods=[np.nansum, np.nanprod]).produce())
        higher_order_transformations.extend(
            NumpyBinaryClassGenerator(
                methods=[np.divide, np.subtract]).produce())

        # count is missing
        higher_order_transformations.extend(
            GroupByThenGenerator(
                2, methods=[np.nanmax, np.nanmin, np.nanmean,
                            np.nanstd]).produce())

        Fui = self.generate_features1(unary_transformations, self.raw_features)

        Fi_and_Fui = []
        Fi_and_Fui.extend(self.raw_features)
        Fi_and_Fui.extend(Fui)

        Foi = self.generate_features1(higher_order_transformations, Fi_and_Fui)

        Foui = self.generate_features1(unary_transformations, Foi)

        Fi_cand = []
        Fi_cand.extend(Fui)
        Fi_cand.extend(Foi)
        Fi_cand.extend(Foui)

        return Fi_cand
Exemple #4
0
    def generate_candidates(self):

        unary_transformations: List[UnaryTransformation] = []
        unary_transformations.append(
            PandasDiscretizerTransformation(number_bins=10))
        unary_transformations.append(MinMaxScalingTransformation())

        higher_order_transformations: List[Transformation] = []
        higher_order_transformations.extend(
            HigherOrderCommutativeClassGenerator(
                2, methods=[np.nansum, np.nanprod]).produce())
        higher_order_transformations.extend(
            NumpyBinaryClassGenerator(
                methods=[np.divide, np.subtract]).produce())

        #count is missing
        higher_order_transformations.extend(
            GroupByThenGenerator(
                2, methods=[np.nanmax, np.nanmin, np.nanmean,
                            np.nanstd]).produce())

        transformations = []
        transformations.extend(unary_transformations)
        transformations.extend(higher_order_transformations)
        #transformations.append(IdentityTransformation(2))

        print("unary transformations: " + str(len(unary_transformations)))
        print("higherorder transformations: " +
              str(len(higher_order_transformations)))

        features = self.Fi
        '''
        graph = nx.DiGraph()


        graph.add_node('root')
        for f in features:
            graph.add_node(str(f))
            graph.node[str(f)]['feature'] = f
            graph.add_edge('root', str(f))
        '''

        F0 = features

        F = []
        F.append(F0)
        '''
        for depth in range(2):
            F_t_plus_1 = []
            for t_i in transformations:
                for f_i in t_i.get_combinations(list(itertools.chain(*F[0:depth+1]))):
                    if t_i.is_applicable(f_i):
                        current_feature = CandidateFeature(copy.deepcopy(t_i), f_i)
                        print(current_feature)

                        
                        graph.add_node(str(current_feature))
                        graph.node[str(current_feature)]['feature'] = current_feature
                        for parent_feature in f_i:
                            graph.add_edge(str(parent_feature), str(current_feature))
                        
                        F_t_plus_1.append(current_feature)
            F.append(F_t_plus_1)

            print(len(list(itertools.chain(*F))))

        #self.plot_graph(graph)
        '''

        for depth in range(3):
            results = self.generate_in_parallel(transformations,
                                                F[0:depth + 1])
            F.append(results)

            print(len(list(itertools.chain(*F))))