def __init__(self, split_type='cv', partitions=10, test_size=0.3, seed=0, fields=None): if fields is None: fields = ['X', 'Y'] config = self._to_config(locals()) # config cleaning. if split_type == "cv": del config['test_size'] elif split_type == "loo": del config['partitions'] del config['partition'] del config['test'] del config['seed'] elif split_type == 'holdout': pass else: raise Exception('Wrong split_type: ', split_type) super().__init__(config) from pjml.macro import split self.transformer = Chain( Expand(), split(split_type, partitions, test_size, seed, fields))
def __mul__(self, other): from pjml.tool.chain import Chain from pjml.config.description.cs.chaincs import ChainCS if isinstance(other, (Chain, ChainCS)): return Chain(self, *other.components) if isinstance(self, (Chain, ChainCS)): return Chain(*self.components, other) return Chain(self, other)
def __init__(self, split_type: str = "cv", partitions: int = 10, test_size: float = 0.3, seed: int = 0, fields: str = "X,Y", **kwargs): config = self._to_config(locals()) # config cleaning. if split_type == "cv": del config["test_size"] elif split_type == "loo": del config["partitions"] del config["partition"] del config["test"] del config["seed"] elif split_type == "holdout": pass else: raise Exception("Wrong split_type: ", split_type) from pjml.macro import split self._component = Chain( Repeat(), split(split_type, partitions, test_size, seed, fields)) super().__init__(config, **kwargs)
def printing_test(arq="iris.arff"): print(Chain(Map(select(File(arq))))) exp = Workflow( File(arq), Partition(), Map(PCA(), SVMC(), Metric(enhance=False)), Map(Report("<---------------------- fold"), enhance=False), Summ(function="mean", enhance=False), Reduce(), Report("mean ... S: $S", enhance=False), ) print(exp) print(select(DT(), SVMC())) sel = select(DT(), SVMC()) print(sel) print(Map(DT())) exp = ChainCS( File(arq), Partition(), Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric(enhance=False)), Report("teste"), Map(Report("<---------------------- fold")), ) print(exp)
def __init__( self, split_type: str = "holdout", partitions: int = 2, partition: int = 0, test_size: float = 0.3, seed: int = 0, fields: str = "X,Y", **kwargs, ): config = self._to_config(locals()) trsplit = TrSplit( split_type=split_type, partitions=partitions, partition=partition, test_size=test_size, seed=seed, fields=fields, **kwargs, ) tssplit = TsSplit( split_type=split_type, partitions=partitions, partition=partition, test_size=test_size, seed=seed, fields=fields, **kwargs, ) # HINT: Chain should be in the order below; otherwise, input data will differ for trsplit and tssplit. self._component = Chain(tssplit, trsplit) super().__init__(config, **kwargs)
def __init__(self, config, seed, transformers, deterministic): super().__init__(config, seed, transformers, deterministic) # Implementation-wise, Container1(Chain(a,b,c)) is needed to make # Container1(a,b,c) possible. if len(self.transformers) > 1: from pjml.tool.chain import Chain self.transformer = Chain(transformers=self.transformers) else: self.transformer = self.transformers[0]
class Partition(HeavyTransformer): """Class to perform, e.g. Expand+kfoldCV. This task is already done by function split(), but if performance becomes a concern, this less modular solution is a good choice. TODO: the current implementation is just an alias for the nonoptimized previous solution. """ def __init__(self, split_type='cv', partitions=10, test_size=0.3, seed=0, fields=None): if fields is None: fields = ['X', 'Y'] config = self._to_config(locals()) # config cleaning. if split_type == "cv": del config['test_size'] elif split_type == "loo": del config['partitions'] del config['partition'] del config['test'] del config['seed'] elif split_type == 'holdout': pass else: raise Exception('Wrong split_type: ', split_type) super().__init__(config) from pjml.macro import split self.transformer = Chain( Expand(), split(split_type, partitions, test_size, seed, fields)) def _apply_impl(self, data): splitter_model = self.transformer.apply(data) applied = splitter_model.data.last_transformations_replaced( drop=self.transformer.size, transformation=self.transformations('a')[0]) return Model(self, data, applied, splitter_model=splitter_model) def _use_impl(self, data, splitter_model=None): used = splitter_model.use(data) return used.last_transformations_replaced( drop=self.transformer.size, transformation=self.transformations('u')[0]) @classmethod def _cs_impl(cls): raise NotImplementedError
def __init__(self, config, enhancer_cls, model_cls, seed, components, enhance, model, deterministic): super().__init__(config, enhancer_cls, model_cls, seed, components, enhance, model, deterministic) # Implementation-wise, Container1(Chain(a,b,c)) is needed to make # Container1(a,b,c) possible. if len(self.components) > 1: from pjml.tool.chain import Chain self.component = Chain(components=self.components) else: self.component = self.components[0]
OnlyUse(Calc(functions=['flatten'])), OnlyUse(Report('flatten S ... S: $S')), OnlyUse(Calc(functions=['mean'])), OnlyUse(Report('mean S ... S: $S')), Report('End ...\n'), ) # diversidade, # Lambda(function='$R[0][0] * $R[0][1]', field='r') print('sample .................') pipe = full(rnd(expr, n=5), field='S', n=1).sample() # # pipes = rnd(expr, n=5) # # magia = Multi(pipes) -> Diversity() -> Agrega() # magia.apply() # coll = magia.use() # # pipe = full(pipes, field='S', n=1).sample() print('apply .................') data = Workflow(File("abalone3.arff"), Binarize()).apply().data c = Chain(pipe.wrapped, Report()) model = c.apply(data) print('use .................') dataout = model.use(data)
def sample(self): import numpy as np from pjml.tool.chain import Chain css = self.components.copy() np.random.shuffle(css) return Chain(transformers=[cs.sample() for cs in css])
def sample(self): components = [cs.sample() for cs in self.components] from pjml.tool.chain import Chain return Chain(components=components)
), Metric(functions=['accuracy']) ) ) ) # {history.last.config['function']} print(expr) print('sample .................') pipe = full(rnd(expr, n=10), field='S').sample() pipe.enable_pretty_printing() print(f'Pipe:\n{pipe}') print(f'Wrapped:\n{pipe.unwrap}') pipe = Chain(File('abalone3.arff'), Binarize(), Split(), pipe.unwrap, Metric(), Report()) print('apply .................') model = pipe.apply() # print(222222222222222, dataout.history) # data morre no apply() do predictor print('use .................') # print(3333333333333333, dataout.history) # RUS desaparece no use() exit(0)
def sample(self): transformers = [cs.sample() for cs in self.components] from pjml.tool.chain import Chain return Chain(transformers=transformers)
def evaluator(*components, function="mean_std", **validation_args): return Chain(Partition(**validation_args), Map(components=components), Summ(function=function))
def evaluator(*components, function='mean_std', **validation_args): return Chain(Partition(**validation_args), Map(transformers=components), Summ(function=function))