def data_science_pipeline() -> Pipeline: """Create the data science pipeline.""" return Pipeline( nodes=[ node( func=fit_pca, inputs={ "x": "primary_classified_x", "kwargs": "params:fit_pca", }, outputs={ "x": "model_output_pca_x", "variance": "model_output_pca_variance", }, name="fit-pca", tags="pca", ), node( func=fit_tsne, inputs={ "x": "primary_classified_x", "kwargs": "params:fit_tsne", }, outputs="model_output_tsne_x", name="fit-tsne", tags="tsne", ), ] )
def data_visualization_pipeline() -> Pipeline: """Create the data visualization pipeline.""" return Pipeline(nodes=[ node( func=plot_pca, inputs={ "x": "model_output_pca_x", "y": "primary_classified_y", "variance": "model_output_pca_variance", "metadata": "params:metadata", "kwargs": "params:plot_pca", }, outputs="reporting_pca", name="plot-pca", tags="pca", ), node( func=plot_tsne, inputs={ "x": "model_output_tsne_x", "y": "primary_classified_y", "metadata": "params:metadata", "kwargs": "params:plot_tsne", }, outputs="reporting_tsne", name="plot-tsne", tags="tsne", ), ])
def mock_pipeline() -> Pipeline: return Pipeline( [ node(identity_node, "cars", "planes", name="node1"), node(identity_node, "boats", "ships", name="node2"), ], tags="pipeline", )
def _get_pipelines(self) -> Dict[str, Pipeline]: pipeline = Pipeline( [ node(broken_node, None, "A", name="node1"), node(broken_node, None, "B", name="node2"), ], tags="pipeline", ) return {"__default__": pipeline}
def _get_pipelines(self) -> Dict[str, Pipeline]: pipeline = Pipeline( [ node(identity, "cars", "planes", name="node1"), node(identity, "boats", "ships", name="node2"), ], tags="pipeline", ) return {"__default__": pipeline}
def broken_node(): raise ValueError("broken") def assert_exceptions_equal(e1: Exception, e2: Exception): assert isinstance(e1, type(e2)) and str(e1) == str(e2) @pytest.fixture def dummy_dataframe(): return pd.DataFrame({"test": [1, 2]}) CONTEXT_PIPELINE = Pipeline( [ node(identity, "cars", "planes", name="node1"), node(identity, "boats", "ships", name="node2"), ], tags="pipeline", ) BROKEN_PIPELINE = Pipeline( [ node(broken_node, None, "A", name="node1"), node(broken_node, None, "B", name="node2"), ], tags="pipeline", ) MockDistInfo = namedtuple("Distinfo", ["project_name", "version"])
def broken_node(): raise ValueError("broken") def assert_exceptions_equal(e1: Exception, e2: Exception): assert isinstance(e1, type(e2)) and str(e1) == str(e2) @pytest.fixture def dummy_dataframe(): return pd.DataFrame({"test": [1, 2]}) context_pipeline = Pipeline( [ node(identity, "cars", "planes", name="node1"), node(identity, "boats", "ships", name="node2"), ], tags="pipeline", ) class LoggingHooks: """A set of test hooks that only log information when invoked. Use a log queue to properly test log messages written by hooks invoked by ParallelRunner. """ handler_name = "hooks_handler" def __init__(self, logs_queue): self.logger = logging.getLogger("hooks_handler")
def data_engineering_pipeline() -> Pipeline: """Create the data engineering pipeline.""" return Pipeline(nodes=[ node( func=extract, inputs="raw_matlab_image", outputs="intermediate_image", name="extract-image", tags=["pca", "tsne", "tcn"], ), node( func=extract, inputs="raw_matlab_ground_truth", outputs="intermediate_ground_truth", name="extract-ground-truth", tags=["pca", "tsne", "tcn"], ), node( func=scale, inputs={ "image": "intermediate_image", "kwargs": "params:scale" }, outputs="scale_image", name="scale-image", tags=["pca", "tsne", "tcn"], ), node( func=separate, inputs={ "image": "scale_image", "ground_truth": "intermediate_ground_truth", }, outputs={ "classified_x": "primary_classified_x", "unclassified_x": "primary_unclassified_x", "classified_y": "primary_classified_y", "unclassified_y": "primary_unclassified_y", }, name="separate-classified-and-unclassified-samples", tags=["pca", "tsne", "tcn"], ), node( func=split, inputs={ "x": "primary_classified_x", "y": "primary_classified_y", "kwargs": "params:split", }, outputs={ "x_train": "model_input_classified_x_train", "x_test": "model_input_classified_x_test", "x_valid": "model_input_classified_x_valid", "y_train": "model_input_classified_y_train", "y_test": "model_input_classified_y_test", "y_valid": "model_input_classified_y_valid", }, name="split-dataset", tags="tcn", ), ])