예제 #1
0
class Partition(HeavyTransformer):
    """Class to perform, e.g. Expand+kfoldCV.

    This task is already done by function split(),
    but if performance becomes a concern, this less modular solution is a
    good choice.

    TODO: the current implementation is just an alias for the nonoptimized
        previous solution.
    """
    def __init__(self,
                 split_type='cv',
                 partitions=10,
                 test_size=0.3,
                 seed=0,
                 fields=None):
        if fields is None:
            fields = ['X', 'Y']
        config = self._to_config(locals())

        # config cleaning.
        if split_type == "cv":
            del config['test_size']
        elif split_type == "loo":
            del config['partitions']
            del config['partition']
            del config['test']
            del config['seed']
        elif split_type == 'holdout':
            pass
        else:
            raise Exception('Wrong split_type: ', split_type)

        super().__init__(config)
        from pjml.macro import split
        self.transformer = Chain(
            Expand(), split(split_type, partitions, test_size, seed, fields))

    def _apply_impl(self, data):
        splitter_model = self.transformer.apply(data)
        applied = splitter_model.data.last_transformations_replaced(
            drop=self.transformer.size,
            transformation=self.transformations('a')[0])

        return Model(self, data, applied, splitter_model=splitter_model)

    def _use_impl(self, data, splitter_model=None):
        used = splitter_model.use(data)
        return used.last_transformations_replaced(
            drop=self.transformer.size,
            transformation=self.transformations('u')[0])

    @classmethod
    def _cs_impl(cls):
        raise NotImplementedError
예제 #2
0
    OnlyUse(Calc(functions=['flatten'])),
    OnlyUse(Report('flatten S ... S: $S')),
    OnlyUse(Calc(functions=['mean'])),
    OnlyUse(Report('mean S ... S: $S')),
    Report('End ...\n'),
)

# diversidade,
# Lambda(function='$R[0][0] * $R[0][1]', field='r')

print('sample .................')
pipe = full(rnd(expr, n=5), field='S', n=1).sample()

#
# pipes = rnd(expr, n=5)
#
# magia = Multi(pipes) -> Diversity() -> Agrega()
# magia.apply()
# coll = magia.use()
#
# pipe = full(pipes, field='S', n=1).sample()

print('apply .................')
data = Workflow(File("abalone3.arff"), Binarize()).apply().data

c = Chain(pipe.wrapped, Report())
model = c.apply(data)

print('use .................')
dataout = model.use(data)
예제 #3
0
)



# {history.last.config['function']}
print(expr)
print('sample .................')
pipe = full(rnd(expr, n=10), field='S').sample()
pipe.enable_pretty_printing()
print(f'Pipe:\n{pipe}')
print(f'Wrapped:\n{pipe.unwrap}')
pipe = Chain(File('abalone3.arff'), Binarize(), Split(), pipe.unwrap,
             Metric(), Report())

print('apply .................')
model = pipe.apply()

# print(222222222222222, dataout.history)
# data morre no apply() do predictor


print('use .................')


# print(3333333333333333, dataout.history)
# RUS desaparece no use()

exit(0)

#
# # AutoML ===================================================================