def data_science_pipeline() -> Pipeline:
    """Create the data science pipeline."""
    return Pipeline(
        nodes=[
            node(
                func=fit_pca,
                inputs={
                    "x": "primary_classified_x",
                    "kwargs": "params:fit_pca",
                },
                outputs={
                    "x": "model_output_pca_x",
                    "variance": "model_output_pca_variance",
                },
                name="fit-pca",
                tags="pca",
            ),
            node(
                func=fit_tsne,
                inputs={
                    "x": "primary_classified_x",
                    "kwargs": "params:fit_tsne",
                },
                outputs="model_output_tsne_x",
                name="fit-tsne",
                tags="tsne",
            ),
        ]
    )
def data_visualization_pipeline() -> Pipeline:
    """Create the data visualization pipeline."""
    return Pipeline(nodes=[
        node(
            func=plot_pca,
            inputs={
                "x": "model_output_pca_x",
                "y": "primary_classified_y",
                "variance": "model_output_pca_variance",
                "metadata": "params:metadata",
                "kwargs": "params:plot_pca",
            },
            outputs="reporting_pca",
            name="plot-pca",
            tags="pca",
        ),
        node(
            func=plot_tsne,
            inputs={
                "x": "model_output_tsne_x",
                "y": "primary_classified_y",
                "metadata": "params:metadata",
                "kwargs": "params:plot_tsne",
            },
            outputs="reporting_tsne",
            name="plot-tsne",
            tags="tsne",
        ),
    ])
Example #3
0
def mock_pipeline() -> Pipeline:
    return Pipeline(
        [
            node(identity_node, "cars", "planes", name="node1"),
            node(identity_node, "boats", "ships", name="node2"),
        ],
        tags="pipeline",
    )
Example #4
0
 def _get_pipelines(self) -> Dict[str, Pipeline]:
     pipeline = Pipeline(
         [
             node(broken_node, None, "A", name="node1"),
             node(broken_node, None, "B", name="node2"),
         ],
         tags="pipeline",
     )
     return {"__default__": pipeline}
Example #5
0
 def _get_pipelines(self) -> Dict[str, Pipeline]:
     pipeline = Pipeline(
         [
             node(identity, "cars", "planes", name="node1"),
             node(identity, "boats", "ships", name="node2"),
         ],
         tags="pipeline",
     )
     return {"__default__": pipeline}
Example #6
0
def broken_node():
    raise ValueError("broken")


def assert_exceptions_equal(e1: Exception, e2: Exception):
    assert isinstance(e1, type(e2)) and str(e1) == str(e2)


@pytest.fixture
def dummy_dataframe():
    return pd.DataFrame({"test": [1, 2]})


CONTEXT_PIPELINE = Pipeline(
    [
        node(identity, "cars", "planes", name="node1"),
        node(identity, "boats", "ships", name="node2"),
    ],
    tags="pipeline",
)

BROKEN_PIPELINE = Pipeline(
    [
        node(broken_node, None, "A", name="node1"),
        node(broken_node, None, "B", name="node2"),
    ],
    tags="pipeline",
)

MockDistInfo = namedtuple("Distinfo", ["project_name", "version"])
Example #7
0
def broken_node():
    raise ValueError("broken")


def assert_exceptions_equal(e1: Exception, e2: Exception):
    assert isinstance(e1, type(e2)) and str(e1) == str(e2)


@pytest.fixture
def dummy_dataframe():
    return pd.DataFrame({"test": [1, 2]})


context_pipeline = Pipeline(
    [
        node(identity, "cars", "planes", name="node1"),
        node(identity, "boats", "ships", name="node2"),
    ],
    tags="pipeline",
)


class LoggingHooks:
    """A set of test hooks that only log information when invoked.
    Use a log queue to properly test log messages written by hooks invoked by ParallelRunner.
    """

    handler_name = "hooks_handler"

    def __init__(self, logs_queue):
        self.logger = logging.getLogger("hooks_handler")
def data_engineering_pipeline() -> Pipeline:
    """Create the data engineering pipeline."""
    return Pipeline(nodes=[
        node(
            func=extract,
            inputs="raw_matlab_image",
            outputs="intermediate_image",
            name="extract-image",
            tags=["pca", "tsne", "tcn"],
        ),
        node(
            func=extract,
            inputs="raw_matlab_ground_truth",
            outputs="intermediate_ground_truth",
            name="extract-ground-truth",
            tags=["pca", "tsne", "tcn"],
        ),
        node(
            func=scale,
            inputs={
                "image": "intermediate_image",
                "kwargs": "params:scale"
            },
            outputs="scale_image",
            name="scale-image",
            tags=["pca", "tsne", "tcn"],
        ),
        node(
            func=separate,
            inputs={
                "image": "scale_image",
                "ground_truth": "intermediate_ground_truth",
            },
            outputs={
                "classified_x": "primary_classified_x",
                "unclassified_x": "primary_unclassified_x",
                "classified_y": "primary_classified_y",
                "unclassified_y": "primary_unclassified_y",
            },
            name="separate-classified-and-unclassified-samples",
            tags=["pca", "tsne", "tcn"],
        ),
        node(
            func=split,
            inputs={
                "x": "primary_classified_x",
                "y": "primary_classified_y",
                "kwargs": "params:split",
            },
            outputs={
                "x_train": "model_input_classified_x_train",
                "x_test": "model_input_classified_x_test",
                "x_valid": "model_input_classified_x_valid",
                "y_train": "model_input_classified_y_train",
                "y_test": "model_input_classified_y_test",
                "y_valid": "model_input_classified_y_valid",
            },
            name="split-dataset",
            tags="tcn",
        ),
    ])