from protoml import Pipeline
from protoml.nodes.data import *
from protoml.nodes.sklearn import *
from protoml.nodes import MetricNode
from protoml.extras import visualize_pipeline

from sklearn import svm
from sklearn import datasets
from sklearn.metrics import f1_score, accuracy_score

if __name__ == "__main__":
    # main protoml pipeline
    P = Pipeline()

    # load data
    digits = datasets.load_digits()
    test = int(digits.data.shape[0] * .25)

    # construct all nodes to be used

    # data nodes
    P.add_nodes(
        # node for training data
        ("Training Data", LabeledTrainingDataNode(digits.data[:-test],
                                                  digits.target[:-test])),
        # node for validation data
        ("Validation Data", LabeledTestDataNode(digits.data[-test:],
                                                digits.target[-test:])),
    )

    # create nodes using scikit-learn's support vector classification
from protoml.nodes import EstimatorNode,MetricNode,MachineEvaluatorNode
from protoml.viz import *
from protoml.feature import *
from protoml.extras import visualize_pipeline

from sklearn import svm
from sklearn import neighbors
from sklearn import cluster
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score, accuracy_score

# <codecell>

# main protoml pipeline
P = Pipeline()

# load data
digits = datasets.load_digits()
test = int(digits.data.shape[0] * .25)
digits.data.shape

# <codecell>

# zero score and pca down to 32 dimensions
pca_ft = ("pca", "", PCA(n_components=32), True)

ft = Feature(digits.data)
ft.add_transforms([
                  ft_standardscaler(),
                  pca_ft