if __name__ == "__main__":
    # main protoml pipeline
    P = Pipeline()

    # load data
    digits = datasets.load_digits()
    test = int(digits.data.shape[0] * .25)

    # construct all nodes to be used

    # data nodes
    P.add_nodes(
        # node for training data
        ("Training Data", LabeledTrainingDataNode(digits.data[:-test],
                                                  digits.target[:-test])),
        # node for validation data
        ("Validation Data", LabeledTestDataNode(digits.data[-test:],
                                                digits.target[-test:])),
    )

    # create nodes using scikit-learn's support vector classification
    # over logarithmic range of C from 0.01 to 1000
    for exp in range(-1, 3):
        # use one vs all node for multi-classification
        #                   note use of node naming
        P.add_node("SVM %s" % 10 ** exp, SklearnOneVsRestNode(svm.SVC, C=10 ** exp))

    # create node for scoring the classifers
    P.add_node("Metrics", MetricNode([accuracy_score, f1_score],
                                     verbose=True))