def printing_test(arq="iris.arff"):
    print(Chain(Map(select(File(arq)))))
    exp = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Map(Report("<---------------------- fold"), enhance=False),
        Summ(function="mean", enhance=False),
        Reduce(),
        Report("mean ... S: $S", enhance=False),
    )
    print(exp)
    print(select(DT(), SVMC()))

    sel = select(DT(), SVMC())
    print(sel)
    print(Map(DT()))
    exp = ChainCS(
        File(arq),
        Partition(),
        Map(PCA(), select(SVMC(), DT(criterion="gini")),
            Metric(enhance=False)),
        Report("teste"),
        Map(Report("<---------------------- fold")),
    )
    print(exp)
def ger_workflow(seed=0, arq="iris.arff"):
    np.random.seed(seed)

    workflow = Workflow(File(arq),
                        Partition(),
                        Map(PCA(), select(SVMC(), DT(criterion="gini")),
                            Metric(enhance=False)),
                        Summ(function="mean", enhance=False),
                        Reduce(),
                        Report("Mean S: $S", enhance=False),
                        seed=seed)

    return workflow
def test_with_summ_reduce(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric()),
        Map(Report("<---------------------- etapa")),
        Summ(),
        Reduce(),
        Report("mean ... S: $S"),
    )
    train, test = pipe.dual_transform()

    print("Train..............\n", train.history ^ "longname")
    print("Test..........\n", test.history ^ "longname")
def test_split_train_test(arq="iris.arff"):
    pipe = Cache(
        File(arq),
        TsSplit(
        ),  # TsSplit should come before TrSplit to ensure the same original data is used as input for both.
        TrSplit(),
        PCA(),
        SVMC(),
        Metric(enhance=False),
        Report("metric ... R: $R", enhance=False),
        storage_alias="oka")
    train, test = pipe.dual_transform()

    print("Train..............\n", train)
    print("Test..........\n", test)
def test_partition(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Summ(function="mean", enhance=False),
        Reduce(),
        Report("mean ... S: $S", enhance=False),
        Report("$X"),
        Report("$y"),
    )
    train, test = pipe.dual_transform()

    print("Train..............\n", train)
    print("Test..........\n", test)
def test_sequence_of_classifiers(arq="abalone.arff"):
    pipe = Workflow(
        File(arq),
        Binarize(),
        Report('1 {X.shape} {history^name}'),
        PCA(n=5),
        SVMC(),
        Metric(),
        Report('2 {X.shape} {history^name}'),
        DT(),
        Metric(),
        Report('3 {X.shape} {history^name}'),
    )
    print('Enh')
    train = pipe.enhancer.transform(sd.NoData)
    print('Mod')
    test = pipe.model(sd.NoData).transform(
        sd.NoData)  # TODO: pq report não aparece no test?
    print()

    print("[test_sequence_of_classifiers] Train.........\n",
          train.history ^ "longname")
    print("[test_sequence_of_classifiers] Test..........\n",
          test.history ^ "longname")
def test_check_architecture2(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Summ(field="Y", function="mean", enhance=False),
        Report("mean ... S: $S", enhance=False),
    )

    # tenho file na frente
    train_ = pipe.enhancer.transform(sd.NoData)
    test_ = pipe.model(sd.NoData).transform(sd.NoData)
    test_ = pipe.model(sd.NoData).transform((sd.NoData, sd.NoData))
    train_, test_ = pipe.dual_transform(sd.NoData, sd.NoData)
    train_, test_ = pipe.dual_transform(sd.NoData, (sd.NoData, sd.NoData))
def random_search(arq="iris.arff"):
    np.random.seed(0)
    exp = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric()),
        # Map(Report("<---------------------- fold"), enhance=False),
        Summ(function="mean"),
        Reduce(),
        Report("Mean S: $S"),
    )

    expr = sample(exp, n=10)
    result = optimize(expr, n=5)
    result.disable_pretty_printing()
    print(result)
def test_check_architecture(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(partitions=2),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Summ(field="Y", function="mean", enhance=False),
    )

    # tenho file na frente
    train_01 = pipe.enhancer.transform(sd.NoData)
    test_01 = pipe.model(sd.NoData).transform(sd.NoData)
    train_02, test_02 = pipe.dual_transform(sd.NoData, sd.NoData)

    # Collection uuid depends on data, which depends on consumption.
    for t, *_ in train_01, train_02, test_01, test_02:
        # print(111111111, t.y)
        pass

    assert train_01.uuid == train_02.uuid
    assert test_01.uuid == test_02.uuid
Example #10
0
# cs = Pipeline(SelectKB)
# print(cs)
# exit()
#
# s = cs.sample()
# print(s)
# exit()

expr = Workflow(
    OnlyApply(File("abalone3.arff"), Binarize()),
    Partition(),
    Map(
        Wrap(
            select(SelectBest),
            ApplyUsing(select(DT, RF, NB)),
            OnlyApply(Metric(functions=['length'])),
            OnlyUse(Metric(functions=['accuracy', 'error'])),
            # AfterUse(Metric(function=['diversity']))
        ), ),
    Report('HISTORY ... S: {history}'),
    Summ(function='mean_std'),
    Report('mean and std ... S: $S'),
    OnlyApply(Copy(from_field="S", to_field="B")),
    OnlyApply(Report('copy S to B ... B: $B')),
    OnlyUse(
        MConcat(input_field1="B",
                input_field2="S",
                output_field="S",
                direction='vertical')),
    OnlyUse(Report('comcat B with S (vertical) ... S: $S')),
    OnlyUse(Calc(functions=['flatten'])),
Example #11
0
from pjml.tool.data.modeling.supervised.classifier.svmc import SVMC
from pjml.tool.data.processing.feature.binarize import Binarize
from pjml.tool.data.processing.instance.sampler.over.random import OverS
from pjml.tool.meta.mfe import MFE

# ML 1 ========================================================================
# # Armazenar dataset, sem depender do pacote pjml.
# from cururu.pickleserver import PickleServer
#
# try:
#     PickleServer().store(read_arff('iris.arff'))
# except DuplicateEntryException:
#     pass

pipe = Pipeline(
    Cache(File('bank.arff'), Binarize(), NB(), Metric(), Report('$X')))
print('aaaaaaaa')
m = pipe.apply()
print(m.data)
print('uuuuuuuuuuuuuuu')
d = m.use()
print(d)
exit()

#     # Source('messedup-dataset'),
#     Keep(evaluator(
#         Cache(
#             ApplyUsing(
#                 NB()
#             ),
#             Metric(function='accuracy')
Example #12
0
    print('Duplicate! Ignored.')

numpy.random.seed(50)
# import sklearn
# print('The scikit-learn version is {}.'.format(sklearn.__version__))
print('expr .................')
expr = Pipeline(
    OnlyApply(File('iris.arff')),
    Cache(
    evaluator(
    Wrap(
        shuffle(Std, MinMax),
        # shuffle(Std, select(UnderS, OverS), MinMax),
        ApplyUsing(select(DT, NB)),
    ),
    Metric(functions=['accuracy'])
    )
    )
)



# {history.last.config['function']}
print(expr)
print('sample .................')
pipe = full(rnd(expr, n=10), field='S').sample()
pipe.enable_pretty_printing()
print(f'Pipe:\n{pipe}')
print(f'Wrapped:\n{pipe.unwrap}')
pipe = Chain(File('abalone3.arff'), Binarize(), Split(), pipe.unwrap,
             Metric(), Report())
def test_pca(arq="iris.arff"):
    cs = File(arq).cs
    pipe = Workflow(File(arq), Split(), PCA(), SVMC(), Metric())
    train, test = pipe.dual_transform()
    print("Train..............\n", train.history ^ "name")
    print("Test..........\n", test.history ^ "name")
def test_metric(arq="iris.arff"):
    pipe = Workflow(File(arq), Split(), SVMC(), Metric(enhance=False))
    train, test = pipe.dual_transform()
    print("Train..............\n", train)
    print("Test..........\n", test)
Example #15
0
    print('Duplicate! Ignored.')

numpy.random.seed(50)
# import sklearn
# print('The scikit-learn version is {}.'.format(sklearn.__version__))
print('expr .................')
expr = Workflow(
    OnlyApply(File('iris.arff')),
    Cache(
        evaluator(
            Wrap(
                shuffle(Std, MinMax),
                # shuffle(Std, select(UnderS, OverS), MinMax),
                ApplyUsing(select(DT, NB)),
            ),
            Metric(functions=['accuracy']))))

# {history.last.config['function']}
print(expr)
print('sample .................')
pipe = full(rnd(expr, n=10), field='S').sample()
pipe.enable_pretty_printing()
print(f'Pipe:\n{pipe}')
print(f'Wrapped:\n{pipe.unwrap}')
pipe = Chain(File('abalone3.arff'), Binarize(), Split(), pipe.unwrap, Metric(),
             Report())

print('apply .................')
model = pipe.apply()

# print(222222222222222, dataout.history)
Example #16
0
#             # AfterUse(Metric(function=['diversity']))
#         ),
#     ),
#     Summ(function='mean_std'),
#     Report('$S'),
# )

pipe = Pipeline(
    File("abalone3.arff"),
    Binarize(),
    Partition(),
    Map(
        UnderS(sampling_strategy='not minority'),
        # RF(),
        Cache(RF()),
        Metric()),
    Summ(function='mean_std'),
    Report('mean S --> \n$S'),
    Report('mean S --> $S'),
    OnlyApply(Copy(from_field="S", to_field="B")),
    OnlyUse(MConcat(fields=["S", "S"], output_field="S",
                    direction='vertical')),
    Calc(functions=['flatten']),
    Report('mean S --> $S'))

print('Applying...')
model = pipe.apply()
if model.data:
    for i, t in enumerate(model.data.history):
        print(f'hist {i}', t)
# exit()