def emit_successful_expectation(_context): yield ExpectationResult( success=True, label="always_true", description="Successful", metadata_entries=[ EventMetadataEntry.json(label="data", data={"reason": "Just because."}) ], )
def emit_successful_expectation(_context): yield ExpectationResult( success=True, label='always_true', description='Successful', metadata_entries=[ EventMetadataEntry.json(label='data', data={'reason': 'Just because.'}) ], )
def emit_events_op(input_num): a_num = input_num + 1 yield ExpectationResult(success=a_num > 0, label="positive", description="A num must be positive") yield AssetMaterialization( asset_key="persisted_string", description="Let us pretend we persisted the string somewhere", ) yield Output(value=a_num, output_name="a_num")
def emit_failed_expectation(_context): yield ExpectationResult( success=False, label="always_false", description="Failure", metadata_entries=[ EventMetadataEntry.json( label="data", data={"reason": "Relentless pessimism."}) ], )
def emit_failed_expectation(_context): yield ExpectationResult( success=False, label='always_false', description='Failure', metadata_entries=[ EventMetadataEntry.json( label='data', data={'reason': 'Relentless pessimism.'}) ], )
def emit_events_solid(_, input_num): a_num = input_num + 1 a_string = 'foo' yield ExpectationResult( success=a_num > 0, label='positive', description='A num must be positive' ) yield Materialization( label='persisted_string', description='Let us pretend we persisted the string somewhere' ) yield Output(value=a_num, output_name='a_num') yield Output(value=a_string, output_name='a_string')
def combine(_context, num1, num2): result = num2 + num1 yield ExpectationResult( success=result > 0, description="ensure positive result", metadata_entries=[ EventMetadataEntry.text("{result}".format(result=result), label="combine result") ], ) yield Output(result)
def add_two_nums(_context, num1: PositiveNumber, num2: PositiveNumber ): # mypy compliance only works for naked python type adding = num1 + num2 # 2 + 3 => 5 add_percent_type = PercentType(adding) # object 5 yield ExpectationResult( success=add_percent_type.value > 100, # value is 500 description="ensure PercentType gets a number greater than 100", metadata_entries=[ EventMetadataEntry.text("{result}".format(result=add_percent_type), label="transfer to percent type") ]) yield Output(add_percent_type) # object 5
def add_two_nums( _context, num1: PositiveNumber, num2: PositiveNumber ) -> PercentDagsterType: # catches the input type error if not int or not positive adding = num1 + num2 # 2 + 3 => 5 add_percent_type = PercentType(adding) # object 5 yield ExpectationResult( success=add_percent_type.value > 100, # value is 500 description="ensure PercentType gets a number greater than 100", metadata_entries=[ EventMetadataEntry.text("{result}".format(result=add_percent_type), label="transfer to percent type") ]) yield Output(add_percent_type) # object 5
def combine(_context, num1, num2): result = num2 + num1 # Use EventMetadataEntry.text to view result against expected result description # In dagit Expectation: success -> green, failure -> yellow yield ExpectationResult( success=result > 0, description="ensure positive result", metadata_entries=[ EventMetadataEntry.text("{result}".format(result=result), label="combine result") ], ) yield Output(result)
def my_metadata_expectation_solid(context, df): df = do_some_transform(df) yield ExpectationResult( success=len(df) > 0, description="ensure dataframe has rows", metadata_entries=[ EventMetadataEntry.text("Text-based metadata for this event", label="text_metadata"), EventMetadataEntry.url("http://mycoolsite.com/url_for_my_data", label="dashboard_url"), EventMetadataEntry.int(len(df), "row count"), EventMetadataEntry.float(calculate_bytes(df), "size (bytes)"), ], ) yield Output(df)
def send_to_slack(context, download_data): transaction_data = download_data[0] block_data = download_data[1] transaction_date = transaction_data['transaction_date'][0] block_date = block_data['block_date'][0] yield ExpectationResult( label='dates_match', success=transaction_date == block_date, metadata_entries=[ EventMetadataEntry.text(str(transaction_date), 'transaction_date'), EventMetadataEntry.text(str(block_date), 'block_date'), ], ) date = transaction_date dash_transferred = transaction_data['DASH_transferred'][0] dash_blocks = block_data['DASH_blocks'][0] average_dash_transferred_per_block = float(dash_transferred) / dash_blocks yield Materialization( label='data', metadata_entries=[ EventMetadataEntry.text( '{dash_transferred} dash tranferred'.format( dash_transferred=dash_transferred), 'dash_transferred', ), EventMetadataEntry.text( '{dash_blocks} dash blocks'.format(dash_blocks=dash_blocks), 'dash_blocks'), ], ) context.resources.slack.chat.post_message( channel='#metrics-testing', text= '{date}\nDash Transferred: {dash_transferred}\nDash blocks: {dash_blocks}\n' 'Average dash transferred/block: {average_dash_transferred_per_block}'. format( date=date, dash_transferred=dash_transferred, dash_blocks=dash_blocks, average_dash_transferred_per_block= average_dash_transferred_per_block, ), ) yield Output(1)
def check_users_and_groups_one_fails_one_succeeds(_context): yield ExpectationResult( success=True, name='user_expectations', message='Battery of expectations for user', result_metadata={ 'columns': { 'name': {'nulls': 0, 'empty': 0, 'values': 123, 'average_length': 3.394893}, 'time_created': {'nulls': 1, 'empty': 2, 'values': 120, 'average': 1231283}, } }, ) yield ExpectationResult( success=False, name='groups_expectations', message='Battery of expectations for groups', result_metadata={ 'columns': { 'name': {'nulls': 1, 'empty': 0, 'values': 122, 'average_length': 3.394893}, 'time_created': {'nulls': 1, 'empty': 2, 'values': 120, 'average': 1231283}, } }, )
def my_metadata_expectation_solid(context, df): do_some_transform(df) yield ExpectationResult( success=len(df) > 0, description='ensure dataframe has rows', metadata_entries=[ EventMetadataEntry.text('Text-based metadata for this event', label='text_metadata'), EventMetadataEntry.url('http://mycoolsite.com/url_for_my_data', label='dashboard_url'), EventMetadataEntry.float(1.0 * len(df), 'row count'), EventMetadataEntry.float(calculate_bytes(df), 'size (bytes)'), ], ) yield Output(df)
def try_parse_pass(text): pass_match = TEST_PASS_REGEX.search(text) if not pass_match: return None test_name = pass_match.group(1) return ExpectationResult( success=True, label='dbt_test', description='Dbt test {} passed'.format(test_name), metadata_entries=[ EventMetadataEntry.text(label='dbt_test_name', text=test_name) ], )
def _ge_validation_fn(context, dataset): data_context = context.resources.ge_data_context if validation_operator_name is not None: validation_operator = validation_operator_name else: data_context.add_validation_operator( "ephemeral_validation", { "class_name": "ActionListValidationOperator", "action_list": [] }, ) validation_operator = "ephemeral_validation" suite = data_context.get_expectation_suite(suite_name) final_batch_kwargs = batch_kwargs or {"dataset": dataset} if "datasource" in batch_kwargs: context.log.warning( "`datasource` field of `batch_kwargs` will be ignored; use the `datasource_name` " f"parameter of the {decorator_name} factory instead.") final_batch_kwargs["datasource"] = datasource_name batch = data_context.get_batch(final_batch_kwargs, suite) run_id = { "run_name": datasource_name + " run", "run_time": datetime.datetime.utcnow(), } results = data_context.run_validation_operator( validation_operator, assets_to_validate=[batch], run_id=run_id) res = convert_to_json_serializable( results.list_validation_results())[0] validation_results_page_renderer = ValidationResultsPageRenderer( run_info_at_end=True) rendered_document_content_list = ( validation_results_page_renderer.render_validation_operator_result( results)) md_str = " ".join( DefaultMarkdownPageView().render(rendered_document_content_list)) meta_stats = MetadataEntry("Expectation Results", value=MetadataValue.md(md_str)) yield ExpectationResult( success=res["success"], metadata_entries=[ meta_stats, ], ) yield Output(res)
def my_metadata_expectation_solid(context, df): df = do_some_transform(df) yield ExpectationResult( success=len(df) > 0, description="ensure dataframe has rows", metadata={ "text_metadata": "Text-based metadata for this event", "dashboard_url": EventMetadata.url("http://mycoolsite.com/url_for_my_data"), "raw_count": len(df), "size (bytes)": calculate_bytes(df), }, ) yield Output(df)
def my_metadata_expectation_op(context, df): df = do_some_transform(df) context.log_event( ExpectationResult( success=len(df) > 0, description="ensure dataframe has rows", metadata={ "text_metadata": "Text-based metadata for this event", "dashboard_url": MetadataValue.url("http://mycoolsite.com/url_for_my_data"), "raw_count": len(df), "size (bytes)": calculate_bytes(df), }, )) return df
def try_parse_fail(text): fail_match = TEST_FAIL_REGEX.search(text) if not fail_match: return None failure_count = fail_match.group(1) test_name = fail_match.group(2) return ExpectationResult( success=False, label='dbt_test', description='Dbt test {} failed'.format(test_name), metadata_entries=[ EventMetadataEntry.text(label='dbt_test_name', text=test_name), EventMetadataEntry.text(label='failure_count', text=failure_count), ], )
def many_materializations_and_passing_expectations(_context): tables = [ 'users', 'groups', 'events', 'friends', 'pages', 'fans', 'event_admins', 'group_admins', ] for table in tables: yield Materialization(path='/path/to/{}'.format(table), description='This is a table.') yield ExpectationResult( success=True, name='{table}.row_count'.format(table=table), message='Row count passed for {table}'.format(table=table), )
def ge_validation_solid(context, dataset): data_context = context.resources.ge_data_context if validation_operator_name is not None: validation_operator = validation_operator_name else: data_context.add_validation_operator( "ephemeral_validation", { "class_name": "ActionListValidationOperator", "action_list": [] }, ) validation_operator = "ephemeral_validation" suite = data_context.get_expectation_suite(suite_name) final_batch_kwargs = batch_kwargs or {"dataset": dataset} if "datasource" in batch_kwargs: context.log.warning( "`datasource` field of `batch_kwargs` will be ignored; use the `datasource_name` " "parameter of the solid factory instead.") final_batch_kwargs["datasource"] = datasource_name batch = data_context.get_batch(final_batch_kwargs, suite) run_id = { "run_name": datasource_name + " run", "run_time": datetime.datetime.utcnow(), } results = data_context.run_validation_operator( validation_operator, assets_to_validate=[batch], run_id=run_id) res = convert_to_json_serializable( results.list_validation_results())[0] md_str = render_multiple_validation_result_pages_markdown( validation_operator_result=results, run_info_at_end=True, ) meta_stats = EventMetadataEntry.md(md_str=md_str, label="Expectation Results") yield ExpectationResult( success=res["success"], metadata_entries=[ meta_stats, ], ) yield Output(res)
def ge_validation_solid(context, pandas_df): data_context = context.resources.ge_data_context suite = data_context.get_expectation_suite(suite_name) batch_kwargs = { "dataset": pandas_df, "datasource": datasource_name, } batch = data_context.get_batch(batch_kwargs, suite) run_id = { "run_name": datasource_name + " run", "run_time": datetime.datetime.utcnow(), } results = data_context.run_validation_operator( "action_list_operator", assets_to_validate=[batch], run_id=run_id ) res = convert_to_json_serializable(results.list_validation_results())[0] nmeta = EventMetadataEntry.json( {'overall': res['statistics'], 'individual': res['results']}, 'constraint-metadata', ) yield ExpectationResult(success=res["success"], metadata_entries=[nmeta]) yield Output(res)
def ge_validation_solid(context, pandas_df): data_context = context.resources.ge_data_context if validation_operator_name is not None: validation_operator = validation_operator_name else: data_context.add_validation_operator( "ephemeral_validation", { "class_name": "ActionListValidationOperator", "action_list": [] }, ) validation_operator = "ephemeral_validation" suite = data_context.get_expectation_suite(suite_name) batch_kwargs = { "dataset": pandas_df, "datasource": datasource_name, } batch = data_context.get_batch(batch_kwargs, suite) run_id = { "run_name": datasource_name + " run", "run_time": datetime.datetime.utcnow(), } results = data_context.run_validation_operator( validation_operator, assets_to_validate=[batch], run_id=run_id) res = convert_to_json_serializable( results.list_validation_results())[0] md_str = render_multiple_validation_result_pages_markdown( validation_operator_result=results, run_info_at_end=True, ) meta_stats = EventMetadataEntry.md(md_str=md_str, label="Expectation Results") yield ExpectationResult( success=res["success"], metadata_entries=[ meta_stats, ], ) yield Output(res)
def many_materializations_and_passing_expectations(_context): tables = [ 'users', 'groups', 'events', 'friends', 'pages', 'fans', 'event_admins', 'group_admins', ] for table in tables: yield Materialization( label='table_info', metadata_entries=[ EventMetadataEntry.path(label='table_path', path='/path/to/{}.raw'.format(table)) ], ) yield ExpectationResult( success=True, label='{table}.row_count'.format(table=table), description='Row count passed for {table}'.format(table=table), )
def many_materializations_and_passing_expectations(_context): tables = [ "users", "groups", "events", "friends", "pages", "fans", "event_admins", "group_admins", ] for table in tables: yield AssetMaterialization( asset_key="table_info", metadata={ "table_path": EventMetadata.path(f"/path/to/{table}.raw"), }, ) yield ExpectationResult( success=True, label="{table}.row_count".format(table=table), description="Row count passed for {table}".format(table=table), )
def no_output(_): yield ExpectationResult(True)
def emit_successful_expectation_no_metadata(_context): yield ExpectationResult(success=True, label="no_metadata", description="Successful")
def df_expectations_solid(_context, sum_df): yield ExpectationResult(label="some_expectation", success=True) yield ExpectationResult(label="other_expectation", success=True) yield Output(sum_df)
def my_expectation_solid(context, df): do_some_transform(df) yield ExpectationResult(success=len(df) > 0, description="ensure dataframe has rows") yield Output(df)
def outer_exp(_c, _v): called['outer_exp'] = True return ExpectationResult(True)