from protoml import Pipeline from protoml.nodes.data import * from protoml.nodes.sklearn import * from protoml.nodes import MetricNode from protoml.extras import visualize_pipeline from sklearn import svm from sklearn import datasets from sklearn.metrics import f1_score, accuracy_score if __name__ == "__main__": # main protoml pipeline P = Pipeline() # load data digits = datasets.load_digits() test = int(digits.data.shape[0] * .25) # construct all nodes to be used # data nodes P.add_nodes( # node for training data ("Training Data", LabeledTrainingDataNode(digits.data[:-test], digits.target[:-test])), # node for validation data ("Validation Data", LabeledTestDataNode(digits.data[-test:], digits.target[-test:])), ) # create nodes using scikit-learn's support vector classification
from protoml.nodes import EstimatorNode,MetricNode,MachineEvaluatorNode from protoml.viz import * from protoml.feature import * from protoml.extras import visualize_pipeline from sklearn import svm from sklearn import neighbors from sklearn import cluster from sklearn import datasets from sklearn.decomposition import PCA from sklearn.metrics import f1_score, accuracy_score # <codecell> # main protoml pipeline P = Pipeline() # load data digits = datasets.load_digits() test = int(digits.data.shape[0] * .25) digits.data.shape # <codecell> # zero score and pca down to 32 dimensions pca_ft = ("pca", "", PCA(n_components=32), True) ft = Feature(digits.data) ft.add_transforms([ ft_standardscaler(), pca_ft