def __new__(cls, field_name): return super(FieldNotDefinedErrorData, cls).__new__( cls, check.str_param(field_name, "field_name") )
def __init__(self, mode, selector): self.mode = check.str_param(mode, 'mode') self.message = 'Mode {mode} not found in pipeline {pipeline}.'.format( mode=mode, pipeline=selector.name )
def __init__(self, partition_set_name): super(DauphinPartitionSetNotFoundError, self).__init__() self.partition_set_name = check.str_param(partition_set_name, 'partition_set_name') self.message = 'Partition set {partition_set_name} could not be found.'.format( partition_set_name=self.partition_set_name )
def get_step_by_key(self, key): check.str_param(key, "key") return self.step_dict[key]
def __init__(self, message, pipeline): super(DauphinInvalidSubsetError, self).__init__() self.message = check.str_param(message, 'message') self.pipeline = pipeline
def __new__(cls, solid_name, output_name): return super(OutputHandleSnap, cls).__new__( cls, solid_name=check.str_param(solid_name, "solid_name"), output_name=check.str_param(output_name, "output_name"), )
def get_step_input(plan_builder, solid, input_name, input_def, dependency_structure, handle, parent_step_inputs): check.inst_param(plan_builder, "plan_builder", _PlanBuilder) check.inst_param(solid, "solid", Solid) check.str_param(input_name, "input_name") check.inst_param(input_def, "input_def", InputDefinition) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) check.opt_inst_param(handle, "handle", SolidHandle) check.opt_list_param(parent_step_inputs, "parent_step_inputs", of_type=StepInput) solid_config = plan_builder.environment_config.solids.get(str(handle)) if solid_config and input_name in solid_config.inputs: return StepInput( name=input_name, dagster_type=input_def.dagster_type, source_type=StepInputSourceType.CONFIG, config_data=solid_config.inputs[input_name], ) input_handle = solid.input_handle(input_name) if dependency_structure.has_singular_dep(input_handle): solid_output_handle = dependency_structure.get_singular_dep( input_handle) return StepInput( name=input_name, dagster_type=input_def.dagster_type, source_type=StepInputSourceType.SINGLE_OUTPUT, source_handles=[ plan_builder.get_output_handle(solid_output_handle) ], ) if dependency_structure.has_multi_deps(input_handle): solid_output_handles = dependency_structure.get_multi_deps( input_handle) return StepInput( name=input_name, dagster_type=input_def.dagster_type, source_type=StepInputSourceType.MULTIPLE_OUTPUTS, source_handles=[ plan_builder.get_output_handle(solid_output_handle) for solid_output_handle in solid_output_handles ], ) if solid.container_maps_input(input_name): parent_name = solid.container_mapped_input(input_name).definition.name parent_inputs = { step_input.name: step_input for step_input in parent_step_inputs } if parent_name in parent_inputs: parent_input = parent_inputs[parent_name] return StepInput( name=input_name, dagster_type=input_def.dagster_type, source_type=parent_input.source_type, source_handles=parent_input.source_handles, config_data=parent_input.config_data, ) if solid.definition.input_has_default(input_name): return StepInput( name=input_name, dagster_type=input_def.dagster_type, source_type=StepInputSourceType.DEFAULT_VALUE, config_data=solid.definition.default_value_for_input(input_name), ) # At this point we have an input that is not hooked up to # the output of another solid or provided via environment config. # We will allow this for "Nothing" type inputs and continue. if input_def.dagster_type.kind == DagsterTypeKind.NOTHING: return None # Otherwise we throw an error. raise DagsterInvariantViolationError( ("In pipeline {pipeline_name} solid {solid_name}, input {input_name} " "must get a value either (a) from a dependency or (b) from the " "inputs section of its configuration.").format( pipeline_name=plan_builder.pipeline_name, solid_name=solid.name, input_name=input_name))
def validate_solid_fn(decorator_name, fn_name, compute_fn, input_defs, expected_positionals=None, exclude_nothing=True): check.str_param(decorator_name, 'decorator_name') check.str_param(fn_name, 'fn_name') check.callable_param(compute_fn, 'compute_fn') check.list_param(input_defs, 'input_defs', of_type=InputDefinition) expected_positionals = check.opt_list_param(expected_positionals, 'expected_positionals', of_type=str) if exclude_nothing: names = set(inp.name for inp in input_defs if not inp.runtime_type.is_nothing) nothing_names = set(inp.name for inp in input_defs if inp.runtime_type.is_nothing) else: names = set(inp.name for inp in input_defs) nothing_names = set() # Currently being super strict about naming. Might be a good idea to relax. Starting strict. fn_positionals, input_args = split_function_parameters( compute_fn, expected_positionals) # Validate Positional Parameters missing_positional = validate_decorated_fn_positionals( fn_positionals, expected_positionals) if missing_positional: raise DagsterInvalidDefinitionError( "{decorator_name} '{solid_name}' decorated function does not have required positional " "parameter '{missing_param}'. Solid functions should only have keyword arguments " "that match input names and a first positional parameter named 'context'." .format(decorator_name=decorator_name, solid_name=fn_name, missing_param=missing_positional)) # Validate non positional parameters invalid_function_info = validate_decorated_fn_input_args(names, input_args) if invalid_function_info: if invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[ 'vararg']: raise DagsterInvalidDefinitionError( "{decorator_name} '{solid_name}' decorated function has positional vararg parameter " "'{param}'. Solid functions should only have keyword arguments that match " "input names and a first positional parameter named 'context'." .format( decorator_name=decorator_name, solid_name=fn_name, param=invalid_function_info.param, )) elif invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[ 'missing_name']: if invalid_function_info.param in nothing_names: raise DagsterInvalidDefinitionError( "{decorator_name} '{solid_name}' decorated function has parameter '{param}' that is " "one of the solid input_defs of type 'Nothing' which should not be included since " "no data will be passed for it. ".format( decorator_name=decorator_name, solid_name=fn_name, param=invalid_function_info.param, )) else: raise DagsterInvalidDefinitionError( "{decorator_name} '{solid_name}' decorated function has parameter '{param}' that is not " "one of the solid input_defs. Solid functions should only have keyword arguments " "that match input names and a first positional parameter named 'context'." .format( decorator_name=decorator_name, solid_name=fn_name, param=invalid_function_info.param, )) elif invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[ 'extra']: undeclared_inputs_printed = ", '".join( invalid_function_info.missing_names) raise DagsterInvalidDefinitionError( "{decorator_name} '{solid_name}' decorated function does not have parameter(s) " "'{undeclared_inputs_printed}', which are in solid's input_defs. Solid functions " "should only have keyword arguments that match input names and a first positional " "parameter named 'context'.".format( decorator_name=decorator_name, solid_name=fn_name, undeclared_inputs_printed=undeclared_inputs_printed, )) return positional_arg_name_list(input_args)
def monthly_schedule( pipeline_name, start_date, name=None, execution_day_of_month=1, execution_time=datetime.time(0, 0), tags_fn_for_date=None, solid_subset=None, mode="default", should_execute=None, environment_vars=None, ): check.opt_str_param(name, 'name') check.inst_param(start_date, 'start_date', datetime.datetime) check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date') check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, 'should_execute') check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str) check.str_param(pipeline_name, 'pipeline_name') check.inst_param(start_date, 'start_date', datetime.datetime) check.int_param(execution_day_of_month, 'execution_day') check.inst_param(execution_time, 'execution_time', datetime.time) if execution_day_of_month <= 0 or execution_day_of_month > 31: raise DagsterInvalidDefinitionError( "`execution_day_of_month={}` is not valid for monthly schedule. Execution day must be between 1 and 31" .format(execution_day_of_month)) cron_schedule = '{minute} {hour} {day} * *'.format( minute=execution_time.minute, hour=execution_time.hour, day=execution_day_of_month) partition_fn = date_partition_range(start_date, delta=relativedelta(months=1), fmt="%Y-%m") def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ tags_fn_for_partition_value = lambda partition: {} if tags_fn_for_date: tags_fn_for_partition_value = lambda partition: tags_fn_for_date( partition.value) partition_set = PartitionSetDefinition( name='{}_monthly'.format(pipeline_name), pipeline_name=pipeline_name, partition_fn=partition_fn, environment_dict_fn_for_partition=lambda partition: fn(partition. value), tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, ) return inner
def __new__(cls, output_name, manager_key): return super(HandledOutputData, cls).__new__( cls, output_name=check.str_param(output_name, "output_name"), manager_key=check.str_param(manager_key, "manager_key"), )
def hourly_schedule( pipeline_name, start_date, name=None, execution_time=datetime.time(0, 0), tags_fn_for_date=None, solid_subset=None, mode="default", should_execute=None, environment_vars=None, ): check.opt_str_param(name, 'name') check.inst_param(start_date, 'start_date', datetime.datetime) check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date') check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, 'should_execute') check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str) check.str_param(pipeline_name, 'pipeline_name') check.inst_param(start_date, 'start_date', datetime.datetime) check.inst_param(execution_time, 'execution_time', datetime.time) if execution_time.hour != 0: warnings.warn( "Hourly schedule {schedule_name} created with:\n" "\tschedule_time=datetime.time(hour={hour}, minute={minute}, ...)." "Since this is a hourly schedule, the hour parameter will be ignored and the schedule " "will run on the {minute} mark for the previous hour interval. Replace " "datetime.time(hour={hour}, minute={minute}, ...) with datetime.time(minute={minute}, ...)" "to fix this warning.") cron_schedule = '{minute} * * * *'.format(minute=execution_time.minute) partition_fn = date_partition_range(start_date, delta=datetime.timedelta(hours=1), fmt="%Y-%m-%d-%H:%M") def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ tags_fn_for_partition_value = lambda partition: {} if tags_fn_for_date: tags_fn_for_partition_value = lambda partition: tags_fn_for_date( partition.value) partition_set = PartitionSetDefinition( name='{}_hourly'.format(pipeline_name), pipeline_name=pipeline_name, partition_fn=partition_fn, environment_dict_fn_for_partition=lambda partition: fn(partition. value), tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, ) return inner
def docker_service_up(docker_compose_file, service_name, conn_args=None): check.invariant( TestPostgresInstance.dagster_postgres_installed(), "dagster_postgres must be installed to test with postgres", ) check.str_param(service_name, "service_name") check.str_param(docker_compose_file, "docker_compose_file") check.invariant(os.path.isfile(docker_compose_file), "docker_compose_file must specify a valid file") conn_args = check.opt_dict_param(conn_args, "conn_args") if conn_args else {} from dagster_postgres.utils import wait_for_connection # pylint: disable=import-error if BUILDKITE: yield TestPostgresInstance.conn_string( **conn_args) # buildkite docker is handled in pipeline setup return try: subprocess.check_output([ "docker-compose", "-f", docker_compose_file, "stop", service_name ]) subprocess.check_output([ "docker-compose", "-f", docker_compose_file, "rm", "-f", service_name ]) except subprocess.CalledProcessError: pass try: subprocess.check_output( [ "docker-compose", "-f", docker_compose_file, "up", "-d", service_name ], stderr=subprocess.STDOUT, # capture STDERR for error handling ) except subprocess.CalledProcessError as ex: err_text = ex.output.decode() raise PostgresDockerError( "Failed to launch docker container(s) via docker-compose: {}". format(err_text), ex, ) conn_str = TestPostgresInstance.conn_string(**conn_args) wait_for_connection(conn_str, retry_limit=10, retry_wait=3) yield conn_str try: subprocess.check_output([ "docker-compose", "-f", docker_compose_file, "stop", service_name ]) subprocess.check_output([ "docker-compose", "-f", docker_compose_file, "rm", "-f", service_name ]) except subprocess.CalledProcessError: pass
def __new__(cls, config_type_snap, value_rep): check.inst_param(config_type_snap, "config_type", ConfigTypeSnap) return super(RuntimeMismatchErrorData, cls).__new__( cls, config_type_snap, check.str_param(value_rep, "value_rep"), )
def __new__(cls, field_name, field_snap): return super(MissingFieldErrorData, cls).__new__( cls, check.str_param(field_name, "field_name"), check.inst_param(field_snap, "field_snap", ConfigFieldSnap), )
def get_invocation(self, solid_name): check.str_param(solid_name, "solid_name") return self._invocations_dict[solid_name]
def __new__(cls, step_key, output_name='result'): return super(StepOutputHandle, cls).__new__( cls, step_key=check.str_param(step_key, 'step_key'), output_name=check.str_param(output_name, 'output_name'), )
def get_downstream_inputs(self, solid_name, output_name): check.str_param(solid_name, "solid_name") check.str_param(output_name, "output_name") return self._output_to_upstream_index[solid_name][output_name]
def __init__(self, *args, **kwargs): self.invalid_run_id = check.str_param(kwargs.pop("invalid_run_id"), "invalid_run_id") super(DagsterRunNotFoundError, self).__init__(*args, **kwargs)
def validate_solid_fn( decorator_name: str, fn_name: str, compute_fn: Callable[..., Any], input_defs: List[InputDefinition], expected_positionals: Optional[List[str]] = None, exclude_nothing: Optional[bool] = True, ) -> List[str]: check.str_param(decorator_name, "decorator_name") check.str_param(fn_name, "fn_name") check.callable_param(compute_fn, "compute_fn") check.list_param(input_defs, "input_defs", of_type=InputDefinition) expected_positionals = check.opt_list_param(expected_positionals, "expected_positionals", of_type=str) if exclude_nothing: names = set(inp.name for inp in input_defs if not inp.dagster_type.kind == DagsterTypeKind.NOTHING) nothing_names = set( inp.name for inp in input_defs if inp.dagster_type.kind == DagsterTypeKind.NOTHING) else: names = set(inp.name for inp in input_defs) nothing_names = set() # Currently being super strict about naming. Might be a good idea to relax. Starting strict. fn_positionals, input_args = split_function_parameters( compute_fn, expected_positionals) # Validate Positional Parameters missing_positional = validate_decorated_fn_positionals( fn_positionals, expected_positionals) if missing_positional: raise DagsterInvalidDefinitionError( "{decorator_name} '{solid_name}' decorated function does not have required positional " "parameter '{missing_param}'. Solid functions should only have keyword arguments " "that match input names and a first positional parameter named 'context'." .format(decorator_name=decorator_name, solid_name=fn_name, missing_param=missing_positional)) # Validate non positional parameters invalid_function_info = validate_decorated_fn_input_args(names, input_args) if invalid_function_info: if invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[ "vararg"]: raise DagsterInvalidDefinitionError( "{decorator_name} '{solid_name}' decorated function has positional vararg parameter " "'{param}'. Solid functions should only have keyword arguments that match " "input names and a first positional parameter named 'context'." .format( decorator_name=decorator_name, solid_name=fn_name, param=invalid_function_info.param, )) elif invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[ "missing_name"]: if invalid_function_info.param in nothing_names: raise DagsterInvalidDefinitionError( "{decorator_name} '{solid_name}' decorated function has parameter '{param}' that is " "one of the solid input_defs of type 'Nothing' which should not be included since " "no data will be passed for it. ".format( decorator_name=decorator_name, solid_name=fn_name, param=invalid_function_info.param, )) else: raise DagsterInvalidDefinitionError( "{decorator_name} '{solid_name}' decorated function has parameter '{param}' that is not " "one of the solid input_defs. Solid functions should only have keyword arguments " "that match input names and a first positional parameter named 'context'." .format( decorator_name=decorator_name, solid_name=fn_name, param=invalid_function_info.param, )) elif invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[ "extra"]: undeclared_inputs_printed = ", '".join( invalid_function_info.missing_names) raise DagsterInvalidDefinitionError( "{decorator_name} '{solid_name}' decorated function does not have parameter(s) " "'{undeclared_inputs_printed}', which are in solid's input_defs. Solid functions " "should only have keyword arguments that match input names and a first positional " "parameter named 'context'.".format( decorator_name=decorator_name, solid_name=fn_name, undeclared_inputs_printed=undeclared_inputs_printed, )) return positional_arg_name_list(input_args)
def __init__(self, *args, **kwargs): self.step_key = check.str_param(kwargs.pop("step_key"), "step_key") self.output_name = check.str_param(kwargs.pop("output_name"), "output_name") super(DagsterStepOutputNotFoundError, self).__init__(*args, **kwargs)
def has_step(self, key): check.str_param(key, "key") return key in self.step_dict
def __init__(self, *args, **kwargs): self.step_key = check.str_param(kwargs.pop("step_key"), "step_key") self.input_name = check.str_param(kwargs.pop("input_name"), "input_name") super(DagsterExecutionLoadInputError, self).__init__(*args, **kwargs)
def __init__(self, message): super(DauphinInvalidPipelineRunsFilterError, self).__init__() self.message = check.str_param(message, 'message')
def __init__(self, *args, **kwargs): self.step_key = check.str_param(kwargs.pop("step_key"), "step_key") self.output_name = check.str_param(kwargs.pop("output_name"), "output_name") super(DagsterExecutionHandleOutputError, self).__init__(*args, **kwargs)
def __init__(self, preset, selector): self.preset = check.str_param(preset, 'preset') self.message = 'Preset {preset} not found in pipeline {pipeline}.'.format( preset=preset, pipeline=selector.name )
def __init__(self, *args, **kwargs): self.step_key = check.str_param(kwargs.pop("step_key"), "step_key") self.solid_name = check.str_param(kwargs.pop("solid_name"), "solid_name") self.solid_def_name = check.str_param(kwargs.pop("solid_def_name"), "solid_def_name") super(DagsterExecutionStepExecutionError, self).__init__(*args, **kwargs)
def __init__(self, field_name): self._field_name = check.str_param(field_name, 'field_name') super(DauphinEvaluationStackPathEntry, self).__init__()
def __init__(self, run_id): self.run_id = check.str_param(run_id, "run_id") super(DagsterEventLogInvalidForRun, self).__init__( "Event logs invalid for run id {}".format(run_id) )
def __init__(self, schedule_name): super(DauphinScheduleNotFoundError, self).__init__() self.schedule_name = check.str_param(schedule_name, 'schedule_name') self.message = ( 'Schedule {schedule_name} is not present in the currently loaded repository.' ).format(schedule_name=schedule_name)
def __new__(cls, location_name, repository_name): return super(RepositorySelector, cls).__new__( cls, location_name=check.str_param(location_name, "location_name"), repository_name=check.str_param(repository_name, "repository_name"), )