def main(): data = load_breast_cancer() features = data['data'] labels = data['target'] features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.2, random_state=42) x_train = { 'features': features_train, 'labels': labels_train } x_test = { 'features': features_test, 'labels': labels_test } save_path = '/tmp/pipeline_example' intermediate_features_path = os.path.join(save_path, 'features') pipeline_train = Pipeline('Pca_and_linear_svc_pipeline',[Pca(n_components=0.95), LinearSvc(C=1.0)]) print ('Training pipeline description: ', pipeline_train) y_train = pipeline_train.fit_run(x_train) print('AUC for training set: {}'.format(str(roc_auc_score(y_train['labels'], y_train['scores'])))) pipeline_train.save(save_path) pca = Pca() pca.load(save_path) svc = LinearSvc() svc.load(save_path) pipeline_test = Pipeline('Pca_and_linear_svc_pipeline', [pca, PipelineSaver(intermediate_features_path, 'PCA_test_features'), svc]) pipeline_test.load(save_path) print('Test pipeline description: ', pipeline_test) y_test = pipeline_test.run(x_test) print('AUC for training set: {}'.format(str(roc_auc_score(y_test['labels'], y_test['scores']))))
def test_run_pipeline_saver(self): reduced_features = RecursiveFeatureElimination(n_features=1) pipeline = Pipeline('name_pipeline', [ reduced_features, PipelineSaver('dim_reduction', 'dim_reduction') ]) pipeline.fit(self.X) pipeline.save('/tmp/test_rfe_pipeline_saver') loaded_pipeline = Pipeline( 'name_pipeline', [PipelineLoader('dim_reduction', 'dim_reduction')]) loaded_pipeline.load('/tmp/test_rfe_pipeline_saver') Y = loaded_pipeline.run(self.X) np.testing.assert_equal( Y['features'], np.array(([[0.2], [0.1], [0.1], [0.2], [0.9], [0.8]]))) np.testing.assert_equal(Y['labels'], np.array([0, 0, 0, 0, 1, 1])) np.testing.assert_equal(Y['access_ids'], np.array([0, 0, 1, 1, 2, 2])) np.testing.assert_equal(Y['indices'], np.array( [2])) # indices of the selected features