def get_transformation_for_division(train_X_all, raw_features): unary_transformations: List[UnaryTransformation] = [] binary_transformations: List[Transformation] = [] #unary_transformations.append(PandasDiscretizerTransformation(number_bins=10)) unary_transformations.append(MinMaxScalingTransformation()) unary_transformations.append(MDLPDiscretizerTransformation()) unary_transformations.append(OneDivisionTransformation()) unary_transformations.append(MinusTransformation()) unary_transformations.append(ImputationTransformation('mean')) #unary_transformations.append(ImputationTransformation('median')) #unary_transformations.append(ImputationTransformation('most_frequent')) binary_transformations.extend( HigherOrderCommutativeClassGenerator( 2, methods=[np.nansum, np.nanprod], sympy_methods=[sympy.Add, sympy.Mul]).produce()) binary_transformations.extend( GroupByThenGenerator( 2, methods=[np.nanmax, np.nanmin, np.nanmean, np.nanstd], sympy_methods=[ groupbythenmax, groupbythenmin, groupbythenmean, groupbythenstd ]).produce()) unary_transformations.extend( OneHotGenerator(train_X_all, raw_features).produce()) return unary_transformations, binary_transformations
def get_transformation_for_cat_feature_space(train_X_all, raw_features): unary_transformations: List[UnaryTransformation] = [] binary_transformations: List[Transformation] = [] unary_transformations.append( PandasDiscretizerTransformation(number_bins=10)) unary_transformations.append(MinMaxScalingTransformation()) binary_transformations.extend( HigherOrderCommutativeClassGenerator( 2, methods=[np.nansum, np.nanprod], sympy_methods=[sympy.Add, sympy.Mul]).produce()) binary_transformations.extend( NumpyBinaryClassGenerator(methods=[np.divide, np.subtract], sympy_methods=[sympy_divide, sympy_subtract]).produce()) binary_transformations.extend( GroupByThenGenerator( 2, methods=[np.nanmax, np.nanmin, np.nanmean, np.nanstd], sympy_methods=[ groupbythenmax, groupbythenmin, groupbythenmean, groupbythenstd ]).produce()) unary_transformations.extend( OneHotGenerator(train_X_all, raw_features).produce()) return unary_transformations, binary_transformations
def produce_features(self): unary_transformations: List[UnaryTransformation] = [] unary_transformations.append( PandasDiscretizerTransformation(number_bins=10)) unary_transformations.append(MinMaxScalingTransformation()) higher_order_transformations: List[Transformation] = [] higher_order_transformations.extend( HigherOrderCommutativeClassGenerator( 2, methods=[np.nansum, np.nanprod]).produce()) higher_order_transformations.extend( NumpyBinaryClassGenerator( methods=[np.divide, np.subtract]).produce()) # count is missing higher_order_transformations.extend( GroupByThenGenerator( 2, methods=[np.nanmax, np.nanmin, np.nanmean, np.nanstd]).produce()) Fui = self.generate_features1(unary_transformations, self.raw_features) Fi_and_Fui = [] Fi_and_Fui.extend(self.raw_features) Fi_and_Fui.extend(Fui) Foi = self.generate_features1(higher_order_transformations, Fi_and_Fui) Foui = self.generate_features1(unary_transformations, Foi) Fi_cand = [] Fi_cand.extend(Fui) Fi_cand.extend(Foi) Fi_cand.extend(Foui) return Fi_cand
def generate_candidates(self): unary_transformations: List[UnaryTransformation] = [] unary_transformations.append( PandasDiscretizerTransformation(number_bins=10)) unary_transformations.append(MinMaxScalingTransformation()) higher_order_transformations: List[Transformation] = [] higher_order_transformations.extend( HigherOrderCommutativeClassGenerator( 2, methods=[np.nansum, np.nanprod]).produce()) higher_order_transformations.extend( NumpyBinaryClassGenerator( methods=[np.divide, np.subtract]).produce()) #count is missing higher_order_transformations.extend( GroupByThenGenerator( 2, methods=[np.nanmax, np.nanmin, np.nanmean, np.nanstd]).produce()) transformations = [] transformations.extend(unary_transformations) transformations.extend(higher_order_transformations) #transformations.append(IdentityTransformation(2)) print("unary transformations: " + str(len(unary_transformations))) print("higherorder transformations: " + str(len(higher_order_transformations))) features = self.Fi ''' graph = nx.DiGraph() graph.add_node('root') for f in features: graph.add_node(str(f)) graph.node[str(f)]['feature'] = f graph.add_edge('root', str(f)) ''' F0 = features F = [] F.append(F0) ''' for depth in range(2): F_t_plus_1 = [] for t_i in transformations: for f_i in t_i.get_combinations(list(itertools.chain(*F[0:depth+1]))): if t_i.is_applicable(f_i): current_feature = CandidateFeature(copy.deepcopy(t_i), f_i) print(current_feature) graph.add_node(str(current_feature)) graph.node[str(current_feature)]['feature'] = current_feature for parent_feature in f_i: graph.add_edge(str(parent_feature), str(current_feature)) F_t_plus_1.append(current_feature) F.append(F_t_plus_1) print(len(list(itertools.chain(*F)))) #self.plot_graph(graph) ''' for depth in range(3): results = self.generate_in_parallel(transformations, F[0:depth + 1]) F.append(results) print(len(list(itertools.chain(*F))))