# create nodes using scikit-learn's support vector classification # over logarithmic range of C from 0.01 to 1000 for exp in range(-1, 3): # use one vs all node for multi-classification # note use of node naming P.add_node("SVM %s" % 10 ** exp, SklearnOneVsRestNode(svm.SVC, C=10 ** exp)) # create node for scoring the classifers P.add_node("Metrics", MetricNode([accuracy_score, f1_score], verbose=True)) # construct the DAG of the pipeline by defining layers using lists # that are connected in full feedforward fashion, note the use of regexes # in order to find nodes by their name P << [["Training Data", "Validation Data"], "SVM\d*", "Metrics"] P << ["Validation Data", "Metrics"] # Dag Representation: # ----> SVM 0.1 ----> # Training Data ----> SVM 1 ----> Metrics <-------| # Validation Data ----> SVM 10 ----> | # | ----> SVM 1000 ----> | # | | # -------------------------------------------------------- # show Pipeline visualize_pipeline(P) # execute pipline P.run(timer=True, verbose=False)