class Partition(HeavyTransformer): """Class to perform, e.g. Expand+kfoldCV. This task is already done by function split(), but if performance becomes a concern, this less modular solution is a good choice. TODO: the current implementation is just an alias for the nonoptimized previous solution. """ def __init__(self, split_type='cv', partitions=10, test_size=0.3, seed=0, fields=None): if fields is None: fields = ['X', 'Y'] config = self._to_config(locals()) # config cleaning. if split_type == "cv": del config['test_size'] elif split_type == "loo": del config['partitions'] del config['partition'] del config['test'] del config['seed'] elif split_type == 'holdout': pass else: raise Exception('Wrong split_type: ', split_type) super().__init__(config) from pjml.macro import split self.transformer = Chain( Expand(), split(split_type, partitions, test_size, seed, fields)) def _apply_impl(self, data): splitter_model = self.transformer.apply(data) applied = splitter_model.data.last_transformations_replaced( drop=self.transformer.size, transformation=self.transformations('a')[0]) return Model(self, data, applied, splitter_model=splitter_model) def _use_impl(self, data, splitter_model=None): used = splitter_model.use(data) return used.last_transformations_replaced( drop=self.transformer.size, transformation=self.transformations('u')[0]) @classmethod def _cs_impl(cls): raise NotImplementedError
OnlyUse(Calc(functions=['flatten'])), OnlyUse(Report('flatten S ... S: $S')), OnlyUse(Calc(functions=['mean'])), OnlyUse(Report('mean S ... S: $S')), Report('End ...\n'), ) # diversidade, # Lambda(function='$R[0][0] * $R[0][1]', field='r') print('sample .................') pipe = full(rnd(expr, n=5), field='S', n=1).sample() # # pipes = rnd(expr, n=5) # # magia = Multi(pipes) -> Diversity() -> Agrega() # magia.apply() # coll = magia.use() # # pipe = full(pipes, field='S', n=1).sample() print('apply .................') data = Workflow(File("abalone3.arff"), Binarize()).apply().data c = Chain(pipe.wrapped, Report()) model = c.apply(data) print('use .................') dataout = model.use(data)
) # {history.last.config['function']} print(expr) print('sample .................') pipe = full(rnd(expr, n=10), field='S').sample() pipe.enable_pretty_printing() print(f'Pipe:\n{pipe}') print(f'Wrapped:\n{pipe.unwrap}') pipe = Chain(File('abalone3.arff'), Binarize(), Split(), pipe.unwrap, Metric(), Report()) print('apply .................') model = pipe.apply() # print(222222222222222, dataout.history) # data morre no apply() do predictor print('use .................') # print(3333333333333333, dataout.history) # RUS desaparece no use() exit(0) # # # AutoML ===================================================================