# data nodes
    P.add_nodes(
        # node for training data
        ("Training Data", LabeledTrainingDataNode(digits.data[:-test],
                                                  digits.target[:-test])),
        # node for validation data
        ("Validation Data", LabeledTestDataNode(digits.data[-test:],
                                                digits.target[-test:])),
    )

    # create nodes using scikit-learn's support vector classification
    # over logarithmic range of C from 0.01 to 1000
    for exp in range(-1, 3):
        # use one vs all node for multi-classification
        #                   note use of node naming
        P.add_node("SVM %s" % 10 ** exp, SklearnOneVsRestNode(svm.SVC, C=10 ** exp))

    # create node for scoring the classifers
    P.add_node("Metrics", MetricNode([accuracy_score, f1_score],
                                     verbose=True))

    # construct the DAG of the pipeline by defining layers using lists
    # that are connected in full feedforward fashion, note the use of regexes
    # in order to find nodes by their name
    P << [["Training Data", "Validation Data"], "SVM\d*", "Metrics"]
    P << ["Validation Data", "Metrics"]
    # Dag Representation:
    #                      ---->  SVM 0.1   ---->
    #     Training Data    ---->  SVM 1     ---->   Metrics     <-------|
    #     Validation Data  ---->  SVM 10    ---->                       |
    #            |         ---->  SVM 1000  ---->                       |
    # node for training data
    ("Training Data", LabeledTrainingDataNode(ft["pca.*"][:-test],
                                              digits.target[:-test])),
    # node for validation data
    ("Validation Data", LabeledTestDataNode(ft["pca.*"][-test:],
                                            digits.target[-test:])),
)

# <codecell>

# create nodes using scikit-learn's support vector classification
# over logarithmic range of C from 0.01 to 1000
for exp in range(-1, 3):
    # use one vs all node for multi-classification
    #                   note use of node naming
    P.add_node("Machine SVM %s" % 10 ** exp, SklearnOneVsRestNode(svm.SVC, C=10 ** exp))

# scikit's K-nearest neighbors and K-means algorithms
P.add_node("Machine KNN" , SklearnOneVsRestNode(neighbors.KNeighborsClassifier,warn_on_equidistant=False))
P.add_node("Machine K-means", EstimatorNode(cluster.KMeans,n_clusters=2))

# mass evaluator
P.add_node("Evaluator", MachineEvaluatorNode())

# <codecell>

# create cross validation, 3-fold
P.add_node("Cross Validation", SklearnCrossValidationNode(accuracy_score, score_weight=-1, verbose=True,top_k=4))
P.add_node("Visualize CV", CrossValidationVisualizationNode()) # see scores

# final metric