Beispiel #1
0
def make_plot(df):
    """Make a plot of the star counts and post it as an artifact"""
    ax = df.plot.line(title="GitHub Stars")
    ax.set(xlabel="Date", ylabel="Stars")
    fil = BytesIO()
    plt.savefig(fil, format="svg")
    fig_body = fil.getvalue().decode("utf-8")
    create_markdown(fig_body)
Beispiel #2
0
def test_create_markdown(client, running_with_backend):
    with context(task_run_id="trid"):
        artifact_id = artifacts.create_markdown(markdown="markdown_here")
        assert artifact_id == "id"
        assert client.create_task_run_artifact.called
        assert client.create_task_run_artifact.call_args[1] == {
            "data": {"markdown": "markdown_here"},
            "kind": "markdown",
            "task_run_id": "trid",
        }
Beispiel #3
0
def make_markdown_artifact_dedent():
    mkdown = textwrap.dedent("""
    # This is markdown

    [apparently](https://prefect.io)

    Am I markdown?

    -----

    #### We'll find out
    """)
    artifact_id = artifacts.create_markdown(mkdown)
    print(artifact_id)
Beispiel #4
0
def test_old_create_markdown(client, running_with_backend):
    with context(task_run_id="trid"):
        with pytest.warns(
            UserWarning,
            match="has been moved to `prefect.backend.create_markdown_artifact`",
        ):
            artifact_id = artifacts.create_markdown(markdown="markdown_here")
        assert artifact_id == "id"
        assert client.create_task_run_artifact.called
        assert client.create_task_run_artifact.call_args[1] == {
            "data": {"markdown": "markdown_here"},
            "kind": "markdown",
            "task_run_id": "trid",
        }
Beispiel #5
0
def create_df_artifact(df):
    json_df = df.to_json()
    create_markdown(f"Transformed Dataframe:\n{json_df}")
Beispiel #6
0
def test_create_markdown_not_using_backend(client):
    with context(task_run_id="trid"):
        artifact_id = artifacts.create_markdown(markdown="markdown_here")
        assert artifact_id == None
        assert not client.create_task_run_artifact.called
Beispiel #7
0
    def run(
        self,
        checkpoint_name: str = None,
        context: "ge.DataContext" = None,
        assets_to_validate: list = None,
        batch_kwargs: dict = None,
        expectation_suite_name: str = None,
        context_root_dir: str = None,
        runtime_environment: Optional[dict] = None,
        run_name: str = None,
        run_info_at_end: bool = True,
        disable_markdown_artifact: bool = False,
        validation_operator: str = "action_list_operator",
        evaluation_parameters: Optional[dict] = None,
    ):
        """
        Task run method.

        Args:
            - checkpoint_name (str, optional): the name of the checkpoint; should match the filename of
                the checkpoint without .py
            - context (DataContext, optional): an in-memory GE DataContext object. e.g.
                `ge.data_context.DataContext()` If not provided then `context_root_dir` will be used to
                look for one.
            - assets_to_validate (list, optional): A list of assets to validate when running the
                validation operator.
            - batch_kwargs (dict, optional): a dictionary of batch kwargs to be used when validating
                assets.
            - expectation_suite_name (str, optional): the name of an expectation suite to be used when
                validating assets.
            - context_root_dir (str, optional): the absolute or relative path to the directory holding
                your `great_expectations.yml`
            - runtime_environment (dict, optional): a dictionary of great expectation config key-value
                pairs to overwrite your config in `great_expectations.yml`
            - run_name (str, optional): the name of this  Great Expectation validation run; defaults to
                the task slug
            - run_info_at_end (bool, optional): add run info to the end of the artifact generated by this
                task. Defaults to `True`.
            - disable_markdown_artifact (bool, optional): toggle the posting of a markdown artifact from
                this tasks. Defaults to `False`.
            - evaluation_parameters (Optional[dict], optional): the evaluation parameters to use when
                running validation. For more information, see
                [example](https://docs.prefect.io/api/latest/tasks/great_expectations.html#rungreatexpectationsvalidation)
                and
                [docs](https://docs.greatexpectations.io/en/latest/reference/core_concepts/evaluation_parameters.html).
            - validation_operator (str, optional): configure the actions to be executed after running
                validation. Defaults to `action_list_operator`.

        Raises:
            - 'signals.FAIL' if the validation was not a success

        Returns:
            - result
                ('great_expectations.validation_operators.types.validation_operator_result.ValidationOperatorResult'):
                The Great Expectations metadata returned from the validation

        """

        runtime_environment = runtime_environment or dict()

        # Load context if not provided directly
        if not context:
            context = ge.DataContext(
                context_root_dir=context_root_dir,
                runtime_environment=runtime_environment,
            )

        # Check that the parameters are mutually exclusive
        if (sum(
                bool(x) for x in [
                    (expectation_suite_name and batch_kwargs),
                    assets_to_validate,
                    checkpoint_name,
                ]) != 1):
            raise ValueError(
                "Exactly one of expectation_suite_name + batch_kwargs, assets_to_validate, or "
                "checkpoint_name is required to run validation.")

        # If assets are not provided directly through `assets_to_validate` then they need be loaded
        #   if a checkpoint_name is supplied, then load suite and batch_kwargs from there
        #   otherwise get batch from `batch_kwargs` and `expectation_suite_name`

        if not assets_to_validate:
            assets_to_validate = []
            if checkpoint_name:
                ge_checkpoint = context.get_checkpoint(checkpoint_name)

                for batch in ge_checkpoint["batches"]:
                    batch_kwargs = batch["batch_kwargs"]
                    for suite_name in batch["expectation_suite_names"]:
                        suite = context.get_expectation_suite(suite_name)
                        batch = context.get_batch(batch_kwargs, suite)
                        assets_to_validate.append(batch)
                validation_operator = ge_checkpoint["validation_operator_name"]
            else:
                assets_to_validate.append(
                    context.get_batch(batch_kwargs, expectation_suite_name))

        # Run validation operator
        results = context.run_validation_operator(
            validation_operator,
            assets_to_validate=assets_to_validate,
            run_id={"run_name": run_name or prefect.context.get("task_slug")},
            evaluation_parameters=evaluation_parameters,
        )

        # Generate artifact markdown
        if not disable_markdown_artifact:
            run_info_at_end = True
            validation_results_page_renderer = (
                ge.render.renderer.ValidationResultsPageRenderer(
                    run_info_at_end=run_info_at_end))
            rendered_document_content_list = (
                validation_results_page_renderer.
                render_validation_operator_result(
                    validation_operator_result=results))
            markdown_artifact = " ".join(
                ge.render.view.DefaultMarkdownPageView().render(
                    rendered_document_content_list))

            create_markdown(markdown_artifact)

        if results.success is False:
            raise signals.FAIL(result=results)

        return results
 def run(self, readme, ref):
     artifact_id = artifacts.create_markdown(readme)
     return artifact_id
Beispiel #9
0
    def run(
        self,
        checkpoint_name: str = None,
        context: "ge.DataContext" = None,
        assets_to_validate: list = None,
        batch_kwargs: dict = None,
        expectation_suite_name: str = None,
        get_checkpoint_from_context: bool = False,
        context_root_dir: str = None,
        runtime_environment: Optional[dict] = None,
        run_name: str = None,
        run_info_at_end: bool = True,
        disable_markdown_artifact: bool = False,
    ):
        """
        Task run method.

        Args:
            - checkpoint_name (str, optional): the name of the checkpoint; should match the filename of
                the checkpoint without .py
            - context (DataContext, optional): an in-memory GE DataContext object. e.g.
                `ge.data_context.DataContext()` If not provided then `context_root_dir` will be used to
                look for one.
            - assets_to_validate (list, optional): A list of assets to validate when running the
                validation operator. If not provided then `batch_kwargs` and `expectation_suite_name`
                will be used if context is provided. Also, if not provided and
                `get_checkpoint_from_context` is True then the assets will be loaded from that context.
            - batch_kwargs (dict, optional): a dictionary of batch kwargs to be used when validating
                assets.
            - expectation_suite_name (str, optional): the name of an expectation suite to be used when
                validating assets.
            - get_checkpoint_from_context (bool, optional): get the checkpoint from context. Defaults to
                `False`
            - context_root_dir (str, optional): the absolute or relative path to the directory holding
                your `great_expectations.yml`
            - runtime_environment (dict, optional): a dictionary of great expectation config key-value
                pairs to overwrite your config in `great_expectations.yml`
            - run_name (str, optional): the name of this  Great Expectation validation run; defaults to
                the task slug
            - run_info_at_end (bool, optional): add run info to the end of the artifact generated by this
                task. Defaults to `True`.
            - disable_markdown_artifact (bool, optional): toggle the posting of a markdown artifact from
                this tasks. Defaults to `False`.

        Raises:
            - 'signals.VALIDATIONFAIL' if the validation was not a success

        Returns:
            - result
                ('great_expectations.validation_operators.types.validation_operator_result.ValidationOperatorResult'):
                The Great Expectations metadata returned from the validation

        """
        if checkpoint_name is None:
            raise ValueError("You must provide the checkpoint name.")

        runtime_environment = runtime_environment or dict()

        # Load context if not provided directly
        if not context:
            context = ge.DataContext(
                context_root_dir=context_root_dir,
                runtime_environment=runtime_environment,
            )

        # if assets are not provided directly through `assets_to_validate` then they need be loaded
        #   if the checkpoint is being loaded from the context then load suite and batch from there
        #   otherwise get batch from `batch_kwargs` and `expectation_suite_name`
        if not assets_to_validate:
            assets_to_validate = []
            if get_checkpoint_from_context:
                ge_checkpoint = context.get_checkpoint(checkpoint_name)

                for batch in ge_checkpoint["batches"]:
                    batch_kwargs = batch["batch_kwargs"]
                    for suite_name in batch["expectation_suite_names"]:
                        suite = context.get_expectation_suite(suite_name)
                        batch = context.get_batch(batch_kwargs, suite)
                        assets_to_validate.append(batch)
            else:
                assets_to_validate.append(
                    context.get_batch(batch_kwargs, expectation_suite_name)
                )

        # Run validation operator
        results = context.run_validation_operator(
            checkpoint_name or ge_checkpoint["validation_operator_name"],
            assets_to_validate=assets_to_validate,
            run_id={"run_name": run_name or prefect.context.get("task_slug")},
        )

        if results.success is False:
            raise signals.FAIL(result=results)

        # Generate artifact markdown
        if not disable_markdown_artifact:
            run_info_at_end = True
            validation_results_page_renderer = (
                ge.render.renderer.ValidationResultsPageRenderer(
                    run_info_at_end=run_info_at_end
                )
            )
            rendered_document_content_list = (
                validation_results_page_renderer.render_validation_operator_result(
                    validation_operator_result=results
                )
            )
            markdown_artifact = " ".join(
                ge.render.view.DefaultMarkdownPageView().render(
                    rendered_document_content_list
                )
            )

            create_markdown(markdown_artifact)

        return results