def test_cache(arq="iris.arff"):
    pipe = Workflow(Cache(File(arq), storage_alias="default_sqlite"),
                    Report("{history}"))
    train, test = pipe.dual_transform()

    print("Train..............\n", train.history ^ "name")
    print("Test..........\n", test.history ^ "name")
def test_with_summ_reduce(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric()),
        Map(Report("<---------------------- etapa")),
        Summ(),
        Reduce(),
        Report("mean ... S: $S"),
    )
    train, test = pipe.dual_transform()

    print("Train..............\n", train.history ^ "longname")
    print("Test..........\n", test.history ^ "longname")
def test_partition(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Summ(function="mean", enhance=False),
        Reduce(),
        Report("mean ... S: $S", enhance=False),
        Report("$X"),
        Report("$y"),
    )
    train, test = pipe.dual_transform()

    print("Train..............\n", train)
    print("Test..........\n", test)
def printing_test(arq="iris.arff"):
    print(Chain(Map(select(File(arq)))))
    exp = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Map(Report("<---------------------- fold"), enhance=False),
        Summ(function="mean", enhance=False),
        Reduce(),
        Report("mean ... S: $S", enhance=False),
    )
    print(exp)
    print(select(DT(), SVMC()))

    sel = select(DT(), SVMC())
    print(sel)
    print(Map(DT()))
    exp = ChainCS(
        File(arq),
        Partition(),
        Map(PCA(), select(SVMC(), DT(criterion="gini")),
            Metric(enhance=False)),
        Report("teste"),
        Map(Report("<---------------------- fold")),
    )
    print(exp)
def test_check_architecture(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(partitions=2),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Summ(field="Y", function="mean", enhance=False),
    )

    # tenho file na frente
    train_01 = pipe.enhancer.transform(sd.NoData)
    test_01 = pipe.model(sd.NoData).transform(sd.NoData)
    train_02, test_02 = pipe.dual_transform(sd.NoData, sd.NoData)

    # Collection uuid depends on data, which depends on consumption.
    for t, *_ in train_01, train_02, test_01, test_02:
        # print(111111111, t.y)
        pass

    assert train_01.uuid == train_02.uuid
    assert test_01.uuid == test_02.uuid
def ger_workflow(seed=0, arq="iris.arff"):
    np.random.seed(seed)

    workflow = Workflow(File(arq),
                        Partition(),
                        Map(PCA(), select(SVMC(), DT(criterion="gini")),
                            Metric(enhance=False)),
                        Summ(function="mean", enhance=False),
                        Reduce(),
                        Report("Mean S: $S", enhance=False),
                        seed=seed)

    return workflow
def test_sequence_of_classifiers(arq="abalone.arff"):
    pipe = Workflow(
        File(arq),
        Binarize(),
        Report('1 {X.shape} {history^name}'),
        PCA(n=5),
        SVMC(),
        Metric(),
        Report('2 {X.shape} {history^name}'),
        DT(),
        Metric(),
        Report('3 {X.shape} {history^name}'),
    )
    print('Enh')
    train = pipe.enhancer.transform(sd.NoData)
    print('Mod')
    test = pipe.model(sd.NoData).transform(
        sd.NoData)  # TODO: pq report não aparece no test?
    print()

    print("[test_sequence_of_classifiers] Train.........\n",
          train.history ^ "longname")
    print("[test_sequence_of_classifiers] Test..........\n",
          test.history ^ "longname")
def random_search(arq="iris.arff"):
    np.random.seed(0)
    exp = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric()),
        # Map(Report("<---------------------- fold"), enhance=False),
        Summ(function="mean"),
        Reduce(),
        Report("Mean S: $S"),
    )

    expr = sample(exp, n=10)
    result = optimize(expr, n=5)
    result.disable_pretty_printing()
    print(result)
def test_check_architecture2(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Summ(field="Y", function="mean", enhance=False),
        Report("mean ... S: $S", enhance=False),
    )

    # tenho file na frente
    train_ = pipe.enhancer.transform(sd.NoData)
    test_ = pipe.model(sd.NoData).transform(sd.NoData)
    test_ = pipe.model(sd.NoData).transform((sd.NoData, sd.NoData))
    train_, test_ = pipe.dual_transform(sd.NoData, sd.NoData)
    train_, test_ = pipe.dual_transform(sd.NoData, (sd.NoData, sd.NoData))
def test_pca(arq="iris.arff"):
    cs = File(arq).cs
    pipe = Workflow(File(arq), Split(), PCA(), SVMC(), Metric())
    train, test = pipe.dual_transform()
    print("Train..............\n", train.history ^ "name")
    print("Test..........\n", test.history ^ "name")
def test_metric(arq="iris.arff"):
    pipe = Workflow(File(arq), Split(), SVMC(), Metric(enhance=False))
    train, test = pipe.dual_transform()
    print("Train..............\n", train)
    print("Test..........\n", test)
def test_split(arq="iris.arff"):
    pipe = Workflow(File(arq), Split(), SVMC())
    train, test = pipe.dual_transform()
    print("Train..............\n", str(train))
    print("Test..........\n", str(test))
def test_svmc(arq="iris.arff"):
    cs = File(arq).cs
    pipe = Workflow(File(arq), SVMC())
    train, test = pipe.dual_transform()
    print("Train..............\n", train)
    print("Test..........\n", test)