def in_process(pid, step_keys_to_execute=None): check.int_param(pid, 'pid') check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute') return EngineEventData( metadata_entries=[EventMetadataEntry.text(str(pid), 'pid')] + ([EventMetadataEntry.text(str(step_keys_to_execute), 'step_keys' )] if step_keys_to_execute else []))
def multiprocess(pid, step_keys_to_execute=None): check.int_param(pid, "pid") check.opt_list_param(step_keys_to_execute, "step_keys_to_execute") return EngineEventData( metadata_entries=[EventMetadataEntry.text(str(pid), "pid")] + ([EventMetadataEntry.text(str(step_keys_to_execute), "step_keys" )] if step_keys_to_execute else []))
def multiprocess(pid, parent_pid=None, step_keys_to_execute=None): check.int_param(pid, 'pid') check.opt_int_param(parent_pid, 'parent_pid') check.opt_set_param(step_keys_to_execute, 'step_keys_to_execute') return EngineEventData( metadata_entries=[EventMetadataEntry.text(str(pid), 'pid')] + ([EventMetadataEntry.text(str(parent_pid), 'parent_pid' )] if parent_pid else []) + ([EventMetadataEntry.text(str(step_keys_to_execute), 'step_keys' )] if step_keys_to_execute else []))
def resource_init_success( pipeline_name, execution_plan, log_manager, resource_instances, resource_init_times ): from dagster.core.execution.plan.plan import ExecutionPlan metadata_entries = [] for resource_key in resource_instances.keys(): resource_obj = resource_instances[resource_key] resource_time = resource_init_times[resource_key] metadata_entries.append( EventMetadataEntry.python_artifact( resource_obj.__class__, resource_key, "Initialized in {}".format(resource_time) ) ) return DagsterEvent.from_resource( pipeline_name=check.str_param(pipeline_name, "pipeline_name"), execution_plan=check.inst_param(execution_plan, "execution_plan", ExecutionPlan), log_manager=check.inst_param(log_manager, "log_manager", DagsterLogManager), message="Finished initialization of resources [{}].".format( ", ".join(sorted(resource_init_times.keys())) ), event_specific_data=EngineEventData( metadata_entries=metadata_entries, marker_end="resources", ), )
def resource_init_success(execution_plan, log_manager, resource_instances, resource_init_times): from dagster.core.execution.plan.plan import ExecutionPlan metadata_entries = [] for resource_key in resource_instances.keys(): resource_obj = resource_instances[resource_key] resource_time = resource_init_times[resource_key] metadata_entries.append( EventMetadataEntry.python_artifact( resource_obj.__class__, resource_key, 'Initialized in {}'.format(resource_time))) return DagsterEvent.from_resource( execution_plan=check.inst_param(execution_plan, 'execution_plan', ExecutionPlan), log_manager=check.inst_param(log_manager, 'log_manager', DagsterLogManager), message='Finished initialization of resources [{}].'.format( ', '.join(resource_init_times.keys())), event_specific_data=EngineEventData( metadata_entries=metadata_entries, marker_end='resources', ), )
def object_store_operation(step_context, object_store_operation_result): object_store_name = ('{object_store_name} '.format( object_store_name=object_store_operation_result.object_store_name) if object_store_operation_result.object_store_name else '') serialization_strategy_modifier = ( ' using {serialization_strategy_name}'.format( serialization_strategy_name=object_store_operation_result. serialization_strategy_name) if object_store_operation_result.serialization_strategy_name else '') value_name = object_store_operation_result.value_name if (ObjectStoreOperationType(object_store_operation_result.op) == ObjectStoreOperationType.SET_OBJECT): message = ( 'Stored intermediate object for output {value_name} in ' '{object_store_name}object store{serialization_strategy_modifier}.' ).format( value_name=value_name, object_store_name=object_store_name, serialization_strategy_modifier=serialization_strategy_modifier, ) elif (ObjectStoreOperationType(object_store_operation_result.op) == ObjectStoreOperationType.GET_OBJECT): message = ( 'Retrieved intermediate object for input {value_name} in ' '{object_store_name}object store{serialization_strategy_modifier}.' ).format( value_name=value_name, object_store_name=object_store_name, serialization_strategy_modifier=serialization_strategy_modifier, ) elif (ObjectStoreOperationType(object_store_operation_result.op) == ObjectStoreOperationType.CP_OBJECT): message = ( 'Copied intermediate object for input {value_name} from {key} to {dest_key}' ).format( value_name=value_name, key=object_store_operation_result.key, dest_key=object_store_operation_result.dest_key, ) else: message = '' return DagsterEvent.from_step( DagsterEventType.OBJECT_STORE_OPERATION, step_context, event_specific_data=ObjectStoreOperationResultData( op=object_store_operation_result.op, value_name=value_name, metadata_entries=[ EventMetadataEntry.path(object_store_operation_result.key, label='key') ], ), message=message, )
def _do_expectation(_info, df): ge_df = ge.from_pandas(df) ge_result = ge_callback(ge_df) check.invariant('success' in ge_result) return ExpectationResult( success=ge_result['success'], metadata_entries=[ EventMetadataEntry.json(label='result', data=ge_result) ], )
def resource_init_success(execution_plan, log_manager, resource_init_times): from dagster.core.execution.plan.plan import ExecutionPlan metadata_entries = [ EventMetadataEntry.text('Initialized in {}.'.format(resource_time), resource_name) for resource_name, resource_time in resource_init_times.items() ] return DagsterEvent.from_resource( execution_plan=check.inst_param(execution_plan, 'execution_plan', ExecutionPlan), log_manager=check.inst_param(log_manager, 'log_manager', DagsterLogManager), message='Finished initialization of resources [{}].'.format( ', '.join(resource_init_times.keys())), event_specific_data=EngineEventData( metadata_entries=metadata_entries, marker_end='resources', ), )
def _file_passes(_info, df): with open(file_path) as ff: expt_config = json.load(ff) # This is necessary because ge ends up coercing a type change # on the same dataframe instance, changing the type. But a # ge can't be copied, because it causes an error. # The error is # AttributeError: 'PandasDataset' object has no attribute # 'discard_subset_failing_expectations' # See https://github.com/great-expectations/great_expectations/issues/342 df_copy = copy.deepcopy(df) ge_df = ge.from_pandas(df_copy, expectations_config=expt_config) validate_result = ge_df.validate() check.invariant('success' in validate_result) check.invariant('results' in validate_result) return ExpectationResult( success=validate_result['success'], metadata_entries=[ EventMetadataEntry.json(label='result', data=validate_result) ], )
def interrupted(steps_interrupted): check.list_param(steps_interrupted, 'steps_interrupted', str) return EngineEventData(metadata_entries=[ EventMetadataEntry.text(str(steps_interrupted), 'steps_interrupted') ])
def object_store_operation(step_context, object_store_operation_result): from dagster.core.definitions.events import ObjectStoreOperation check.inst_param( object_store_operation_result, "object_store_operation_result", ObjectStoreOperation ) object_store_name = ( "{object_store_name} ".format( object_store_name=object_store_operation_result.object_store_name ) if object_store_operation_result.object_store_name else "" ) serialization_strategy_modifier = ( " using {serialization_strategy_name}".format( serialization_strategy_name=object_store_operation_result.serialization_strategy_name ) if object_store_operation_result.serialization_strategy_name else "" ) value_name = object_store_operation_result.value_name if ( ObjectStoreOperationType(object_store_operation_result.op) == ObjectStoreOperationType.SET_OBJECT ): message = ( "Stored intermediate object for output {value_name} in " "{object_store_name}object store{serialization_strategy_modifier}." ).format( value_name=value_name, object_store_name=object_store_name, serialization_strategy_modifier=serialization_strategy_modifier, ) elif ( ObjectStoreOperationType(object_store_operation_result.op) == ObjectStoreOperationType.GET_OBJECT ): message = ( "Retrieved intermediate object for input {value_name} in " "{object_store_name}object store{serialization_strategy_modifier}." ).format( value_name=value_name, object_store_name=object_store_name, serialization_strategy_modifier=serialization_strategy_modifier, ) elif ( ObjectStoreOperationType(object_store_operation_result.op) == ObjectStoreOperationType.CP_OBJECT ): message = ( "Copied intermediate object for input {value_name} from {key} to {dest_key}" ).format( value_name=value_name, key=object_store_operation_result.key, dest_key=object_store_operation_result.dest_key, ) else: message = "" return DagsterEvent.from_step( DagsterEventType.OBJECT_STORE_OPERATION, step_context, event_specific_data=ObjectStoreOperationResultData( op=object_store_operation_result.op, value_name=value_name, address=object_store_operation_result.key, metadata_entries=[ EventMetadataEntry.path(object_store_operation_result.key, label="key") ], version=object_store_operation_result.version, mapping_key=object_store_operation_result.mapping_key, ), message=message, )