def printing_test(arq="iris.arff"): print(Chain(Map(select(File(arq))))) exp = Workflow( File(arq), Partition(), Map(PCA(), SVMC(), Metric(enhance=False)), Map(Report("<---------------------- fold"), enhance=False), Summ(function="mean", enhance=False), Reduce(), Report("mean ... S: $S", enhance=False), ) print(exp) print(select(DT(), SVMC())) sel = select(DT(), SVMC()) print(sel) print(Map(DT())) exp = ChainCS( File(arq), Partition(), Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric(enhance=False)), Report("teste"), Map(Report("<---------------------- fold")), ) print(exp)
def ger_workflow(seed=0, arq="iris.arff"): np.random.seed(seed) workflow = Workflow(File(arq), Partition(), Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric(enhance=False)), Summ(function="mean", enhance=False), Reduce(), Report("Mean S: $S", enhance=False), seed=seed) return workflow
def random_search(arq="iris.arff"): np.random.seed(0) exp = Workflow( File(arq), Partition(), Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric()), # Map(Report("<---------------------- fold"), enhance=False), Summ(function="mean"), Reduce(), Report("Mean S: $S"), ) expr = sample(exp, n=10) result = optimize(expr, n=5) result.disable_pretty_printing() print(result)
# exit() # # cs = Pipeline(SelectKB) # print(cs) # exit() # # s = cs.sample() # print(s) # exit() expr = Workflow( OnlyApply(File("abalone3.arff"), Binarize()), Partition(), Map( Wrap( select(SelectBest), ApplyUsing(select(DT, RF, NB)), OnlyApply(Metric(functions=['length'])), OnlyUse(Metric(functions=['accuracy', 'error'])), # AfterUse(Metric(function=['diversity'])) ), ), Report('HISTORY ... S: {history}'), Summ(function='mean_std'), Report('mean and std ... S: $S'), OnlyApply(Copy(from_field="S", to_field="B")), OnlyApply(Report('copy S to B ... B: $B')), OnlyUse( MConcat(input_field1="B", input_field2="S", output_field="S", direction='vertical')),
cache = partial(Cache, storage_alias='default_sqlite') # cache = partial(Cache, storage_alias='mysql') # cache = partial(Cache, storage_alias='default_dump') # cache = partial(Cache, storage_alias='amnesia') # expr = Pipeline(File(arq), cache(ApplyUsing(NB()))) # p = expr # p.apply() expr = Workflow( OnlyApply(File(arq), cache(Binarize())), cache( Partition(), Map( Wrap( select(SelectBest), # slow?? cache(ApplyUsing(select(DT, NB, hold(RF, n_estimators=40)))), OnlyApply(Metric(functions=['length'])), OnlyUse(Metric(functions=['accuracy', 'error'])), # AfterUse(Metric(function=['diversity'])) ), ), # Report('HISTORY ... S: {history}'), Summ(function='mean_std'), ), Report('mean and std ... S: $S'), OnlyApply(Copy(from_field="S", to_field="B")), OnlyApply(Report('copy S to B ... B: $B')), # OnlyUse(Report('>>>>>> B: {B.shape}')), # Report('>>>>>> S: {S.shape}'),
print('ok!') except DuplicateEntryException: print('Duplicate! Ignored.') numpy.random.seed(50) # import sklearn # print('The scikit-learn version is {}.'.format(sklearn.__version__)) print('expr .................') expr = Workflow( OnlyApply(File('iris.arff')), Cache( evaluator( Wrap( shuffle(Std, MinMax), # shuffle(Std, select(UnderS, OverS), MinMax), ApplyUsing(select(DT, NB)), ), Metric(functions=['accuracy'])))) # {history.last.config['function']} print(expr) print('sample .................') pipe = full(rnd(expr, n=10), field='S').sample() pipe.enable_pretty_printing() print(f'Pipe:\n{pipe}') print(f'Wrapped:\n{pipe.unwrap}') pipe = Chain(File('abalone3.arff'), Binarize(), Split(), pipe.unwrap, Metric(), Report()) print('apply .................') model = pipe.apply()