Ejemplo n.º 1
0
 def materialize(_):
     yield Materialization(
         label='all_types',
         description='a materialization with all metadata types',
         metadata_entries=[
             EventMetadataEntry.text('text is cool', 'text'),
             EventMetadataEntry.url('https://bigty.pe/neato', 'url'),
             EventMetadataEntry.fspath('/tmp/awesome', 'path'),
             EventMetadataEntry.json({'is_dope': True}, 'json'),
             EventMetadataEntry.python_artifact(EventMetadataEntry,
                                                'python class'),
             EventMetadataEntry.python_artifact(file_relative_path,
                                                'python function'),
             EventMetadataEntry.float(1.2, 'float'),
         ],
     )
     yield Output(None)
Ejemplo n.º 2
0
 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry,
                                                "python class"),
             EventMetadataEntry.python_artifact(file_relative_path,
                                                "python function"),
             EventMetadataEntry.float(1.2, "float"),
         ],
     )
     yield Output(None)
Ejemplo n.º 3
0
    def _handle_pointer_output(self, context: OutputContext, parquet_pointer: ParquetPointer):

        yield EventMetadataEntry.path(parquet_pointer.path, "Source Parquet Path")
        with connect_snowflake(config=context.resource_config) as con:
            # stage the data stored at the given path
            con.execute(
                f"""
            CREATE TEMPORARY STAGE tmp_s3_stage
                URL = '{parquet_pointer.path}'
                FILE_FORMAT=(TYPE=PARQUET COMPRESSION=SNAPPY)
                CREDENTIALS=(
                    AWS_KEY_ID='{os.getenv("AWS_ACCESS_KEY_ID")}',
                    AWS_SECRET_KEY='{os.getenv("AWS_SECRET_ACCESS_KEY")}'
                );
            """
            )
            con.execute(self._get_create_table_statement(context, parquet_pointer))
            con.execute(self._get_cleanup_statement(context))
            con.execute(self._get_copy_statement(context, parquet_pointer))
Ejemplo n.º 4
0
def dbt_cli_snapshot_freshness(context) -> Dict:
    """This solid executes ``dbt source snapshot-freshness`` via the dbt CLI."""
    cli_output = execute_cli(
        context.solid_config["dbt_executable"],
        command=("source", "snapshot-freshness"),
        flags_dict=passthrough_flags_only(context.solid_config, ("select", "output", "threads")),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    if context.solid_config["yield_materializations"]:
        yield AssetMaterialization(
            asset_key="dbt_source_snapshot-freshness_cli_output",
            description="Output from the CLI execution of `dbt source snapshot-freshness`.",
            metadata_entries=[EventMetadataEntry.json(cli_output, label="CLI Output")],
        )

    yield Output(cli_output)
Ejemplo n.º 5
0
def dbt_cli_run_operation(context) -> Dict:
    """This solid executes ``dbt run-operation`` via the dbt CLI."""
    cli_output = execute_cli(
        context.solid_config["dbt_executable"],
        command=("run-operation", context.solid_config["macro"]),
        flags_dict=passthrough_flags_only(context.solid_config, ("args",)),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    if context.solid_config["yield_materializations"]:
        yield AssetMaterialization(
            asset_key="dbt_run_operation_cli_output",
            description="Output from the CLI execution of `dbt run-operation`.",
            metadata_entries=[EventMetadataEntry.json(cli_output, label="CLI Output")],
        )

    yield Output(cli_output)
Ejemplo n.º 6
0
def _act_on_config(solid_config):
    if solid_config["crash_in_solid"]:
        segfault()
    if solid_config["throw_in_solid"]:
        try:
            raise ExampleException("sample cause exception")
        except ExampleException as e:
            raise Failure(
                description="I'm a Failure",
                metadata_entries=[
                    EventMetadataEntry.text(
                        label="metadata_label",
                        text="I am metadata text",
                        description="metadata_description",
                    )
                ],
            ) from e
    elif solid_config["request_retry"]:
        raise RetryRequested()
Ejemplo n.º 7
0
def test_explicit_failure():
    @lambda_solid
    def throws_failure():
        raise Failure(
            description='Always fails.',
            metadata_entries=[EventMetadataEntry.text('why', label='always_fails')],
        )

    @pipeline
    def pipe():
        throws_failure()

    with pytest.raises(DagsterExecutionStepExecutionError) as exc_info:
        execute_pipeline(pipe)

    assert exc_info.value.user_specified_failure.description == 'Always fails.'
    assert exc_info.value.user_specified_failure.metadata_entries == [
        EventMetadataEntry.text('why', label='always_fails')
    ]
Ejemplo n.º 8
0
def less_simple_data_frame_output_materialization_config(
    context, config, value
):
    csv_path = os.path.abspath(config['csv']['path'])
    with open(csv_path, 'w') as fd:
        fieldnames = list(value[0].keys())
        writer = csv.DictWriter(
            fd, fieldnames, delimiter=config['csv']['sep']
        )
        writer.writeheader()
        writer.writerows(value)
    context.log.debug(
        'Wrote dataframe as .csv to {path}'.format(path=csv_path)
    )
    return Materialization(
        'data_frame_csv',
        'LessSimpleDataFrame materialized as csv',
        [EventMetadataEntry.path(csv_path, 'data_frame_csv_path')],
    )
Ejemplo n.º 9
0
 def ge_validation_solid(context, dataset):
     data_context = context.resources.ge_data_context
     if validation_operator_name is not None:
         validation_operator = validation_operator_name
     else:
         data_context.add_validation_operator(
             "ephemeral_validation",
             {
                 "class_name": "ActionListValidationOperator",
                 "action_list": []
             },
         )
         validation_operator = "ephemeral_validation"
     suite = data_context.get_expectation_suite(suite_name)
     final_batch_kwargs = batch_kwargs or {"dataset": dataset}
     if "datasource" in batch_kwargs:
         context.log.warning(
             "`datasource` field of `batch_kwargs` will be ignored; use the `datasource_name` "
             "parameter of the solid factory instead.")
     final_batch_kwargs["datasource"] = datasource_name
     batch = data_context.get_batch(final_batch_kwargs, suite)
     run_id = {
         "run_name": datasource_name + " run",
         "run_time": datetime.datetime.utcnow(),
     }
     results = data_context.run_validation_operator(
         validation_operator, assets_to_validate=[batch], run_id=run_id)
     res = convert_to_json_serializable(
         results.list_validation_results())[0]
     md_str = render_multiple_validation_result_pages_markdown(
         validation_operator_result=results,
         run_info_at_end=True,
     )
     meta_stats = EventMetadataEntry.md(md_str=md_str,
                                        label="Expectation Results")
     yield ExpectationResult(
         success=res["success"],
         metadata_entries=[
             meta_stats,
         ],
     )
     yield Output(res)
Ejemplo n.º 10
0
def compare_calories(context, cereals, least_hot, least_cold):
    cereals_df = pd.DataFrame(cereals)

    def get_calories(name):
        return cereals_df[cereals_df["name"] == name]["calories"].iloc[0]

    cereal_choice = (
        least_hot if get_calories(least_hot) > get_calories(least_cold) else least_cold
    )
    context.log.info(
        f"Compare the calories of hot and cold cereals: {cereal_choice} is healthier"
    )
    yield AssetMaterialization(
        asset_key="cereal_choice",
        description="Which cereal is healthiest",
        metadata_entries=[
            EventMetadataEntry.text(cereal_choice, "Cereal Choice")
        ],
    )
    yield Output(cereal_choice)
Ejemplo n.º 11
0
def upload_pickled_object_to_gcs_bucket(context, value: Any, bucket_name: str, file_name: str):
    gcs_bucket = context.resources.gcs_client.get_bucket(bucket_name)
    key = '{}-{}'.format(file_name, uuid.uuid4())
    with tempfile.TemporaryFile('w+b') as fp:
        pickle.dump(value, fp, PICKLE_PROTOCOL)
        # Done because you can't upload the contents of a file outside the context manager if it's a tempfile.
        fp.seek(0)
        gcs_bucket.blob(key).upload_from_file(fp)

    yield Materialization(
        description='Serialized object to Google Cloud Storage Bucket',
        label='GCS Blob',
        metadata_entries=[
            EventMetadataEntry.text(
                'gs://{bucket_name}/{key}'.format(bucket_name=bucket_name, key=key),
                'google cloud storage URI',
            ),
        ],
    )
    yield Output(value)
Ejemplo n.º 12
0
    def _dagster_type_check(_, value):
        if not isinstance(value, pd.DataFrame):
            return TypeCheck(
                success=False,
                description=
                'Must be a pandas.DataFrame. Got value of type. {type_name}'.
                format(type_name=type(value).__name__),
            )
        individual_result_dict = {}
        if columns_validator is not None:
            individual_result_dict["columns"] = columns_validator.validate(
                value)

        if columns_aggregate_validator is not None:
            individual_result_dict[
                "column aggregates"] = columns_aggregate_validator.validate(
                    value)

        if dataframe_validator is not None:
            individual_result_dict["dataframe"] = dataframe_validator.validate(
                value)

        typechecks_succeeded = True
        metadata = []
        overall_description = ""
        for key, result in individual_result_dict.items():
            result_val = result.success
            if result_val:
                continue
            typechecks_succeeded = typechecks_succeeded and result_val
            result_dict = result.metadata_entries[0].entry_data.data
            metadata.append(
                EventMetadataEntry.json(
                    result_dict,
                    '{}-constraint-metadata'.format(key),
                ))
            overall_description += "{} failing constraints, requiring {}".format(
                key, result.description)
        return TypeCheck(success=typechecks_succeeded,
                         description=overall_description,
                         metadata_entries=metadata)
Ejemplo n.º 13
0
 def ge_validation_solid(context, pandas_df):
     data_context = context.resources.ge_data_context
     suite = data_context.get_expectation_suite(suite_name)
     batch_kwargs = {
         "dataset": pandas_df,
         "datasource": datasource_name,
     }
     batch = data_context.get_batch(batch_kwargs, suite)
     run_id = {
         "run_name": datasource_name + " run",
         "run_time": datetime.datetime.utcnow(),
     }
     results = data_context.run_validation_operator(
         "action_list_operator", assets_to_validate=[batch], run_id=run_id
     )
     res = convert_to_json_serializable(results.list_validation_results())[0]
     nmeta = EventMetadataEntry.json(
         {'overall': res['statistics'], 'individual': res['results']}, 'constraint-metadata',
     )
     yield ExpectationResult(success=res["success"], metadata_entries=[nmeta])
     yield Output(res)
Ejemplo n.º 14
0
def test_explicit_failure():
    @lambda_solid
    def throws_failure():
        raise DagsterTypeCheckDidNotPass(
            description='Always fails.',
            metadata_entries=[
                EventMetadataEntry.text('why', label='always_fails')
            ],
        )

    @pipeline
    def pipe():
        throws_failure()

    with pytest.raises(DagsterTypeCheckDidNotPass) as exc_info:
        execute_pipeline(pipe)

    assert exc_info.value.description == 'Always fails.'
    assert exc_info.value.metadata_entries == [
        EventMetadataEntry.text('why', label='always_fails')
    ]
Ejemplo n.º 15
0
def dbt_cli_snapshot(context) -> Dict:
    """This solid executes ``dbt snapshot`` via the dbt CLI."""
    cli_output = execute_cli(
        context.solid_config["dbt_executable"],
        command=("snapshot", ),
        flags_dict=passthrough_flags_only(context.solid_config,
                                          ("threads", "models", "exclude")),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    yield AssetMaterialization(
        asset_key="dbt_snapshot_cli_output",
        description="Output from the CLI execution of `dbt snapshot`.",
        metadata_entries=[
            EventMetadataEntry.json(cli_output, label="CLI Output")
        ],
    )

    yield Output(cli_output)
Ejemplo n.º 16
0
def upload_pickled_object_to_gcs_bucket(context, value: Any, bucket_name: str,
                                        file_name: str):
    gcs_bucket = context.resources.gcs_client.get_bucket(bucket_name)
    key = "{}-{}".format(file_name, uuid.uuid4())
    with tempfile.TemporaryFile("w+b") as fp:
        pickle.dump(value, fp, PICKLE_PROTOCOL)
        # Done because you can't upload the contents of a file outside the context manager if it's a tempfile.
        fp.seek(0)
        gcs_bucket.blob(key).upload_from_file(fp)

    gcs_url = "gs://{bucket_name}/{key}".format(bucket_name=bucket_name,
                                                key=key)

    yield AssetMaterialization(
        asset_key=gcs_url,
        description="Serialized object to Google Cloud Storage Bucket",
        metadata_entries=[
            EventMetadataEntry.text(gcs_url, "google cloud storage URI"),
        ],
    )
    yield Output(value)
Ejemplo n.º 17
0
    def _dagster_type_check(_, value):
        if not isinstance(value, pd.DataFrame):
            return TypeCheck(
                success=False,
                description='Must be a pandas.DataFrame. Got value of type. {type_name}'.format(
                    type_name=type(value).__name__
                ),
            )
        individual_result_dict = {}

        if dataframe_validator is not None:
            individual_result_dict["dataframe"] = dataframe_validator.validate(value)
        if columns_validator is not None:
            individual_result_dict["columns"] = columns_validator.validate(value)

        if columns_aggregate_validator is not None:
            individual_result_dict["column-aggregates"] = columns_aggregate_validator.validate(
                value
            )

        typechecks_succeeded = True
        metadata = []
        overall_description = "Failed Constraints: {}"
        constraint_clauses = []
        for key, result in individual_result_dict.items():
            result_val = result.success
            if result_val:
                continue
            typechecks_succeeded = typechecks_succeeded and result_val
            result_dict = result.metadata_entries[0].entry_data.data
            metadata.append(
                EventMetadataEntry.json(result_dict, '{}-constraint-metadata'.format(key),)
            )
            constraint_clauses.append("{} failing constraints, {}".format(key, result.description))
        # returns aggregates, then column, then dataframe
        return TypeCheck(
            success=typechecks_succeeded,
            description=overall_description.format(constraint_clauses),
            metadata_entries=sorted(metadata, key=lambda x: x.label),
        )
Ejemplo n.º 18
0
 def ge_validation_solid(context, pandas_df):
     data_context = context.resources.ge_data_context
     if validation_operator_name is not None:
         validation_operator = validation_operator_name
     else:
         data_context.add_validation_operator(
             "ephemeral_validation",
             {
                 "class_name": "ActionListValidationOperator",
                 "action_list": []
             },
         )
         validation_operator = "ephemeral_validation"
     suite = data_context.get_expectation_suite(suite_name)
     batch_kwargs = {
         "dataset": pandas_df,
         "datasource": datasource_name,
     }
     batch = data_context.get_batch(batch_kwargs, suite)
     run_id = {
         "run_name": datasource_name + " run",
         "run_time": datetime.datetime.utcnow(),
     }
     results = data_context.run_validation_operator(
         validation_operator, assets_to_validate=[batch], run_id=run_id)
     res = convert_to_json_serializable(
         results.list_validation_results())[0]
     md_str = render_multiple_validation_result_pages_markdown(
         validation_operator_result=results,
         run_info_at_end=True,
     )
     meta_stats = EventMetadataEntry.md(md_str=md_str,
                                        label="Expectation Results")
     yield ExpectationResult(
         success=res["success"],
         metadata_entries=[
             meta_stats,
         ],
     )
     yield Output(res)
Ejemplo n.º 19
0
def dbt_cli_test(context) -> DbtCliOutput:
    """This solid executes ``dbt test`` via the dbt CLI."""
    cli_output = execute_cli(
        context.solid_config["dbt_executable"],
        command=("test",),
        flags_dict=passthrough_flags_only(
            context.solid_config, ("data", "schema", "fail-fast", "threads", "models", "exclude")
        ),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )
    run_results = parse_run_results(context.solid_config["project-dir"])
    cli_output = {**run_results, **cli_output}

    yield AssetMaterialization(
        asset_key="dbt_test_cli_output",
        description="Output from the CLI execution of `dbt test`.",
        metadata_entries=[EventMetadataEntry.json(cli_output, label="CLI Output")],
    )

    yield Output(DbtCliOutput.from_dict(cli_output))
Ejemplo n.º 20
0
def test_raise_on_error_true_type_check_returns_successful_type_check():
    TruthyExceptionType = DagsterType(
        name="TruthyExceptionType",
        type_check_fn=lambda _, _val: TypeCheck(
            success=True, metadata_entries=[EventMetadataEntry.text("foo", "bar", "baz")]
        ),
    )

    @solid(output_defs=[OutputDefinition(TruthyExceptionType)])
    def foo_solid(_):
        return 1

    @pipeline
    def foo_pipeline():
        foo_solid()

    pipeline_result = execute_pipeline(foo_pipeline)
    assert pipeline_result.success
    for event in pipeline_result.step_event_list:
        if event.event_type_value == DagsterEventType.STEP_OUTPUT.value:
            assert event.event_specific_data.type_check_data
            assert event.event_specific_data.type_check_data.metadata_entries[0].label == "bar"
            assert (
                event.event_specific_data.type_check_data.metadata_entries[0].entry_data.text
                == "foo"
            )
            assert (
                event.event_specific_data.type_check_data.metadata_entries[0].description == "baz"
            )

    pipeline_result = execute_pipeline(foo_pipeline, raise_on_error=False)
    assert pipeline_result.success
    assert set(
        [
            DagsterEventType.STEP_START.value,
            DagsterEventType.STEP_OUTPUT.value,
            DagsterEventType.STEP_SUCCESS.value,
        ]
    ).issubset([event.event_type_value for event in pipeline_result.step_event_list])
Ejemplo n.º 21
0
def result_to_materialization(
    result: Dict[str, Any], asset_key_prefix: List[str] = None
) -> Optional[AssetMaterialization]:
    """
    This is a hacky solution that attempts to consolidate parsing many of the potential formats
    that dbt can provide its results in. This is known to work for CLI Outputs for dbt versions 0.18+,
    as well as RPC responses for a similar time period, but as the RPC response schema is not documented
    nor enforced, this can become out of date easily.
    """

    asset_key_prefix = check.opt_list_param(asset_key_prefix, "asset_key_prefix", of_type=str)

    # status comes from set of fields rather than "status"
    if "fail" in result:
        success = not result.get("fail") and not result.get("skip") and not result.get("error")
    else:
        success = result["status"] == "success"

    if not success:
        return None

    # all versions represent timing the same way
    metadata = [
        EventMetadataEntry.float(value=result["execution_time"], label="Execution Time (seconds)")
    ] + _timing_to_metadata(result["timing"])

    # working with a response that contains the node block (RPC and CLI 0.18.x)
    if "node" in result:

        unique_id = result["node"]["unique_id"]
        metadata += _node_result_to_metadata(result["node"])
    else:
        unique_id = result["unique_id"]

    return AssetMaterialization(
        description=f"dbt node: {unique_id}",
        metadata_entries=metadata,
        asset_key=asset_key_prefix + unique_id.split("."),
    )
Ejemplo n.º 22
0
def file_handle_to_s3(context, file_handle):
    bucket = context.solid_config['Bucket']
    key = context.solid_config['Key']

    # the s3 put_object API expects the actual bytes to be on the 'Body' key in kwargs; since we
    # get all other fields from config, we copy the config object and add 'Body' here.
    cfg = context.solid_config.copy()
    with context.file_manager.read(file_handle, 'rb') as file_obj:
        cfg['Body'] = file_obj

        context.resources.s3.put_object(**cfg)
        s3_file_handle = S3FileHandle(bucket, key)

        yield Materialization(
            label='file_to_s3',
            metadata_entries=[
                EventMetadataEntry.path(s3_file_handle.s3_path,
                                        label=last_key(key))
            ],
        )

        yield Output(value=s3_file_handle, output_name='s3_file_handle')
Ejemplo n.º 23
0
def many_table_materializations(_context):
    with open(file_relative_path(__file__, MARKDOWN_EXAMPLE), "r") as f:
        md_str = f.read()
        for table in raw_tables:
            yield AssetMaterialization(
                asset_key="table_info",
                metadata_entries=[
                    EventMetadataEntry.text(text=table, label="table_name"),
                    EventMetadataEntry.fspath(path="/path/to/{}".format(table),
                                              label="table_path"),
                    EventMetadataEntry.json(data={"name": table},
                                            label="table_data"),
                    EventMetadataEntry.url(
                        url="https://bigty.pe/{}".format(table),
                        label="table_name_big"),
                    EventMetadataEntry.md(md_str=md_str, label="table_blurb"),
                    EventMetadataEntry.int(29119888133298982934829348,
                                           label="big_int"),
                    EventMetadataEntry.float(float("nan"), label="float_nan"),
                ],
            )
Ejemplo n.º 24
0
def sort_by_calories(context, cereals):
    sorted_cereals = sorted(cereals,
                            key=lambda cereal: int(cereal['calories']))
    context.log.info('Least caloric cereal: {least_caloric}'.format(
        least_caloric=sorted_cereals[0]['name']))
    context.log.info('Most caloric cereal: {most_caloric}'.format(
        most_caloric=sorted_cereals[-1]['name']))
    fieldnames = list(sorted_cereals[0].keys())
    sorted_cereals_csv_path = os.path.abspath(
        'output/calories_sorted_{run_id}.csv'.format(run_id=context.run_id))
    with open(sorted_cereals_csv_path, 'w') as fd:
        writer = csv.DictWriter(fd, fieldnames)
        writer.writeheader()
        writer.writerows(sorted_cereals)
    yield Materialization(
        label='sorted_cereals_csv',
        description='Cereals data frame sorted by caloric content',
        metadata_entries=[
            EventMetadataEntry.path(sorted_cereals_csv_path,
                                    'sorted_cereals_csv_path')
        ],
    )
    yield Output(None)
Ejemplo n.º 25
0
def sort_by_calories(context, cereals):
    sorted_cereals = sorted(cereals,
                            key=lambda cereal: int(cereal["calories"]))
    context.log.info("Least caloric cereal: {least_caloric}".format(
        least_caloric=sorted_cereals[0]["name"]))
    context.log.info("Most caloric cereal: {most_caloric}".format(
        most_caloric=sorted_cereals[-1]["name"]))
    fieldnames = list(sorted_cereals[0].keys())
    sorted_cereals_csv_path = os.path.abspath(
        "output/calories_sorted_{run_id}.csv".format(run_id=context.run_id))
    os.makedirs(os.path.dirname(sorted_cereals_csv_path), exist_ok=True)
    with open(sorted_cereals_csv_path, "w") as fd:
        writer = csv.DictWriter(fd, fieldnames)
        writer.writeheader()
        writer.writerows(sorted_cereals)
    yield Materialization(
        label="sorted_cereals_csv",
        description="Cereals data frame sorted by caloric content",
        metadata_entries=[
            EventMetadataEntry.path(sorted_cereals_csv_path,
                                    "sorted_cereals_csv_path")
        ],
    )
    yield Output(None)
Ejemplo n.º 26
0
def many_materializations_and_passing_expectations(_context):
    tables = [
        'users',
        'groups',
        'events',
        'friends',
        'pages',
        'fans',
        'event_admins',
        'group_admins',
    ]

    for table in tables:
        yield Materialization(
            label='table_info',
            metadata_entries=[
                EventMetadataEntry.path(label='table_path', path='/path/to/{}.raw'.format(table))
            ],
        )
        yield ExpectationResult(
            success=True,
            label='{table}.row_count'.format(table=table),
            description='Row count passed for {table}'.format(table=table),
        )
Ejemplo n.º 27
0
    def made_solid(context):
        partition_date = datetime.strptime(context.solid_config["partition"], DEFAULT_DATE_FORMAT)
        if data_size_fn:
            data_size = data_size_fn(partition_date)
            sleep_time = sleep_factor * data_size

            time.sleep(sleep_time)

        if error_rate and random() < error_rate:
            raise Exception("blah")

        if asset_key:
            metadata_entries = materialization_metadata_entries or []
            if data_size_fn:
                metadata_entries.append(EventMetadataEntry.float(data_size, "Data size (bytes)"))

            if len(metadata_entries) == 0:
                metadata_entries = None

            yield AssetMaterialization(
                asset_key=asset_key,
                metadata_entries=metadata_entries,
                partition=context.solid_config.get("partition"),
            )
Ejemplo n.º 28
0
Archivo: util.py Proyecto: cy56/dagster
def event_metadata_entries(metadata_entry_datas):
    if not metadata_entry_datas:
        return

    for metadata_entry_data in metadata_entry_datas:
        typename = metadata_entry_data['__typename']
        label = metadata_entry_data['label']
        description = metadata_entry_data.get('description')
        if typename == 'EventPathMetadataEntry':
            yield EventMetadataEntry.path(label=label,
                                          description=description,
                                          path=metadata_entry_data['path'])
        elif typename == 'EventJsonMetadataEntry':
            yield EventMetadataEntry.json(
                label=label,
                description=description,
                data=seven.json.loads(metadata_entry_data.get(
                    'jsonString', '')),
            )
        elif typename == 'EventMarkdownMetadataEntry':
            yield EventMetadataEntry.md(label=label,
                                        description=description,
                                        md_str=metadata_entry_data.get(
                                            'md_str', ''))
        elif typename == 'EventTextMetadataEntry':
            yield EventMetadataEntry.text(label=label,
                                          description=description,
                                          text=metadata_entry_data['text'])
        elif typename == 'EventUrlMetadataEntry':
            yield EventMetadataEntry.url(label=label,
                                         description=description,
                                         url=metadata_entry_data['url'])
        elif typename == 'EventPythonArtifactMetadataEntry':
            yield EventMetadataEntry(
                label=label,
                description=description,
                entry_data=PythonArtifactMetadataEntryData(
                    metadata_entry_data['module'],
                    metadata_entry_data['name']),
            )
        else:
            check.not_implemented('TODO for type {}'.format(typename))
Ejemplo n.º 29
0
Archivo: util.py Proyecto: sd2k/dagster
def event_metadata_entries(metadata_entry_datas):
    if not metadata_entry_datas:
        return

    for metadata_entry_data in metadata_entry_datas:
        typename = metadata_entry_data["__typename"]
        label = metadata_entry_data["label"]
        description = metadata_entry_data.get("description")
        if typename == "EventPathMetadataEntry":
            yield EventMetadataEntry.path(label=label,
                                          description=description,
                                          path=metadata_entry_data["path"])
        elif typename == "EventJsonMetadataEntry":
            yield EventMetadataEntry.json(
                label=label,
                description=description,
                data=seven.json.loads(metadata_entry_data.get(
                    "jsonString", "")),
            )
        elif typename == "EventMarkdownMetadataEntry":
            yield EventMetadataEntry.md(label=label,
                                        description=description,
                                        md_str=metadata_entry_data.get(
                                            "md_str", ""))
        elif typename == "EventTextMetadataEntry":
            yield EventMetadataEntry.text(label=label,
                                          description=description,
                                          text=metadata_entry_data["text"])
        elif typename == "EventUrlMetadataEntry":
            yield EventMetadataEntry.url(label=label,
                                         description=description,
                                         url=metadata_entry_data["url"])
        elif typename == "EventPythonArtifactMetadataEntry":
            yield EventMetadataEntry(
                label=label,
                description=description,
                entry_data=PythonArtifactMetadataEntryData(
                    metadata_entry_data["module"],
                    metadata_entry_data["name"]),
            )
        else:
            check.not_implemented("TODO for type {}".format(typename))
Ejemplo n.º 30
0
def compute_traffic_dataframe_event_metadata(dataframe):
    return [
        EventMetadataEntry.text(str(min(dataframe["peak_traffic_load"])),
                                "min_traffic_load", "Best Peak Load"),
        EventMetadataEntry.text(str(max(dataframe["peak_traffic_load"])),
                                "max_traffic_load", "Worst Peak Load"),
        EventMetadataEntry.text(
            str(mean(dataframe["peak_traffic_load"])),
            "mean_traffic_load",
            "Mean peak traffic",
        ),
        EventMetadataEntry.text(
            str(median(dataframe["peak_traffic_load"])),
            "median_traffic_load",
            "Median peak traffic",
        ),
        EventMetadataEntry.text(str(len(dataframe)), "n_rows",
                                "Number of rows seen in the dataframe"),
        EventMetadataEntry.text(str(dataframe.columns), "columns",
                                "Keys of columns seen in the dataframe"),
    ]