if __name__ == "__main__": # main protoml pipeline P = Pipeline() # load data digits = datasets.load_digits() test = int(digits.data.shape[0] * .25) # construct all nodes to be used # data nodes P.add_nodes( # node for training data ("Training Data", LabeledTrainingDataNode(digits.data[:-test], digits.target[:-test])), # node for validation data ("Validation Data", LabeledTestDataNode(digits.data[-test:], digits.target[-test:])), ) # create nodes using scikit-learn's support vector classification # over logarithmic range of C from 0.01 to 1000 for exp in range(-1, 3): # use one vs all node for multi-classification # note use of node naming P.add_node("SVM %s" % 10 ** exp, SklearnOneVsRestNode(svm.SVC, C=10 ** exp)) # create node for scoring the classifers P.add_node("Metrics", MetricNode([accuracy_score, f1_score], verbose=True))