Exemple #1
0
 def __new__(cls, field_name):
     return super(FieldNotDefinedErrorData, cls).__new__(
         cls, check.str_param(field_name, "field_name")
     )
Exemple #2
0
 def __init__(self, mode, selector):
     self.mode = check.str_param(mode, 'mode')
     self.message = 'Mode {mode} not found in pipeline {pipeline}.'.format(
         mode=mode, pipeline=selector.name
     )
Exemple #3
0
 def __init__(self, partition_set_name):
     super(DauphinPartitionSetNotFoundError, self).__init__()
     self.partition_set_name = check.str_param(partition_set_name, 'partition_set_name')
     self.message = 'Partition set {partition_set_name} could not be found.'.format(
         partition_set_name=self.partition_set_name
     )
Exemple #4
0
 def get_step_by_key(self, key):
     check.str_param(key, "key")
     return self.step_dict[key]
Exemple #5
0
 def __init__(self, message, pipeline):
     super(DauphinInvalidSubsetError, self).__init__()
     self.message = check.str_param(message, 'message')
     self.pipeline = pipeline
Exemple #6
0
 def __new__(cls, solid_name, output_name):
     return super(OutputHandleSnap, cls).__new__(
         cls,
         solid_name=check.str_param(solid_name, "solid_name"),
         output_name=check.str_param(output_name, "output_name"),
     )
Exemple #7
0
def get_step_input(plan_builder, solid, input_name, input_def,
                   dependency_structure, handle, parent_step_inputs):
    check.inst_param(plan_builder, "plan_builder", _PlanBuilder)
    check.inst_param(solid, "solid", Solid)
    check.str_param(input_name, "input_name")
    check.inst_param(input_def, "input_def", InputDefinition)
    check.inst_param(dependency_structure, "dependency_structure",
                     DependencyStructure)
    check.opt_inst_param(handle, "handle", SolidHandle)
    check.opt_list_param(parent_step_inputs,
                         "parent_step_inputs",
                         of_type=StepInput)

    solid_config = plan_builder.environment_config.solids.get(str(handle))
    if solid_config and input_name in solid_config.inputs:
        return StepInput(
            name=input_name,
            dagster_type=input_def.dagster_type,
            source_type=StepInputSourceType.CONFIG,
            config_data=solid_config.inputs[input_name],
        )

    input_handle = solid.input_handle(input_name)
    if dependency_structure.has_singular_dep(input_handle):
        solid_output_handle = dependency_structure.get_singular_dep(
            input_handle)
        return StepInput(
            name=input_name,
            dagster_type=input_def.dagster_type,
            source_type=StepInputSourceType.SINGLE_OUTPUT,
            source_handles=[
                plan_builder.get_output_handle(solid_output_handle)
            ],
        )

    if dependency_structure.has_multi_deps(input_handle):
        solid_output_handles = dependency_structure.get_multi_deps(
            input_handle)
        return StepInput(
            name=input_name,
            dagster_type=input_def.dagster_type,
            source_type=StepInputSourceType.MULTIPLE_OUTPUTS,
            source_handles=[
                plan_builder.get_output_handle(solid_output_handle)
                for solid_output_handle in solid_output_handles
            ],
        )

    if solid.container_maps_input(input_name):
        parent_name = solid.container_mapped_input(input_name).definition.name
        parent_inputs = {
            step_input.name: step_input
            for step_input in parent_step_inputs
        }
        if parent_name in parent_inputs:
            parent_input = parent_inputs[parent_name]
            return StepInput(
                name=input_name,
                dagster_type=input_def.dagster_type,
                source_type=parent_input.source_type,
                source_handles=parent_input.source_handles,
                config_data=parent_input.config_data,
            )

    if solid.definition.input_has_default(input_name):
        return StepInput(
            name=input_name,
            dagster_type=input_def.dagster_type,
            source_type=StepInputSourceType.DEFAULT_VALUE,
            config_data=solid.definition.default_value_for_input(input_name),
        )

    # At this point we have an input that is not hooked up to
    # the output of another solid or provided via environment config.

    # We will allow this for "Nothing" type inputs and continue.
    if input_def.dagster_type.kind == DagsterTypeKind.NOTHING:
        return None

    # Otherwise we throw an error.
    raise DagsterInvariantViolationError(
        ("In pipeline {pipeline_name} solid {solid_name}, input {input_name} "
         "must get a value either (a) from a dependency or (b) from the "
         "inputs section of its configuration.").format(
             pipeline_name=plan_builder.pipeline_name,
             solid_name=solid.name,
             input_name=input_name))
Exemple #8
0
def validate_solid_fn(decorator_name,
                      fn_name,
                      compute_fn,
                      input_defs,
                      expected_positionals=None,
                      exclude_nothing=True):
    check.str_param(decorator_name, 'decorator_name')
    check.str_param(fn_name, 'fn_name')
    check.callable_param(compute_fn, 'compute_fn')
    check.list_param(input_defs, 'input_defs', of_type=InputDefinition)
    expected_positionals = check.opt_list_param(expected_positionals,
                                                'expected_positionals',
                                                of_type=str)
    if exclude_nothing:
        names = set(inp.name for inp in input_defs
                    if not inp.runtime_type.is_nothing)
        nothing_names = set(inp.name for inp in input_defs
                            if inp.runtime_type.is_nothing)
    else:
        names = set(inp.name for inp in input_defs)
        nothing_names = set()

    # Currently being super strict about naming. Might be a good idea to relax. Starting strict.
    fn_positionals, input_args = split_function_parameters(
        compute_fn, expected_positionals)

    # Validate Positional Parameters
    missing_positional = validate_decorated_fn_positionals(
        fn_positionals, expected_positionals)
    if missing_positional:
        raise DagsterInvalidDefinitionError(
            "{decorator_name} '{solid_name}' decorated function does not have required positional "
            "parameter '{missing_param}'. Solid functions should only have keyword arguments "
            "that match input names and a first positional parameter named 'context'."
            .format(decorator_name=decorator_name,
                    solid_name=fn_name,
                    missing_param=missing_positional))

    # Validate non positional parameters
    invalid_function_info = validate_decorated_fn_input_args(names, input_args)
    if invalid_function_info:
        if invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[
                'vararg']:
            raise DagsterInvalidDefinitionError(
                "{decorator_name} '{solid_name}' decorated function has positional vararg parameter "
                "'{param}'. Solid functions should only have keyword arguments that match "
                "input names and a first positional parameter named 'context'."
                .format(
                    decorator_name=decorator_name,
                    solid_name=fn_name,
                    param=invalid_function_info.param,
                ))
        elif invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[
                'missing_name']:
            if invalid_function_info.param in nothing_names:
                raise DagsterInvalidDefinitionError(
                    "{decorator_name} '{solid_name}' decorated function has parameter '{param}' that is "
                    "one of the solid input_defs of type 'Nothing' which should not be included since "
                    "no data will be passed for it. ".format(
                        decorator_name=decorator_name,
                        solid_name=fn_name,
                        param=invalid_function_info.param,
                    ))
            else:
                raise DagsterInvalidDefinitionError(
                    "{decorator_name} '{solid_name}' decorated function has parameter '{param}' that is not "
                    "one of the solid input_defs. Solid functions should only have keyword arguments "
                    "that match input names and a first positional parameter named 'context'."
                    .format(
                        decorator_name=decorator_name,
                        solid_name=fn_name,
                        param=invalid_function_info.param,
                    ))
        elif invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[
                'extra']:
            undeclared_inputs_printed = ", '".join(
                invalid_function_info.missing_names)
            raise DagsterInvalidDefinitionError(
                "{decorator_name} '{solid_name}' decorated function does not have parameter(s) "
                "'{undeclared_inputs_printed}', which are in solid's input_defs. Solid functions "
                "should only have keyword arguments that match input names and a first positional "
                "parameter named 'context'.".format(
                    decorator_name=decorator_name,
                    solid_name=fn_name,
                    undeclared_inputs_printed=undeclared_inputs_printed,
                ))

    return positional_arg_name_list(input_args)
Exemple #9
0
def monthly_schedule(
    pipeline_name,
    start_date,
    name=None,
    execution_day_of_month=1,
    execution_time=datetime.time(0, 0),
    tags_fn_for_date=None,
    solid_subset=None,
    mode="default",
    should_execute=None,
    environment_vars=None,
):
    check.opt_str_param(name, 'name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date')
    check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str)
    mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)
    check.opt_callable_param(should_execute, 'should_execute')
    check.opt_dict_param(environment_vars,
                         'environment_vars',
                         key_type=str,
                         value_type=str)
    check.str_param(pipeline_name, 'pipeline_name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.int_param(execution_day_of_month, 'execution_day')
    check.inst_param(execution_time, 'execution_time', datetime.time)

    if execution_day_of_month <= 0 or execution_day_of_month > 31:
        raise DagsterInvalidDefinitionError(
            "`execution_day_of_month={}` is not valid for monthly schedule. Execution day must be between 1 and 31"
            .format(execution_day_of_month))

    cron_schedule = '{minute} {hour} {day} * *'.format(
        minute=execution_time.minute,
        hour=execution_time.hour,
        day=execution_day_of_month)

    partition_fn = date_partition_range(start_date,
                                        delta=relativedelta(months=1),
                                        fmt="%Y-%m")

    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value = lambda partition: {}
        if tags_fn_for_date:
            tags_fn_for_partition_value = lambda partition: tags_fn_for_date(
                partition.value)

        partition_set = PartitionSetDefinition(
            name='{}_monthly'.format(pipeline_name),
            pipeline_name=pipeline_name,
            partition_fn=partition_fn,
            environment_dict_fn_for_partition=lambda partition: fn(partition.
                                                                   value),
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )

    return inner
Exemple #10
0
 def __new__(cls, output_name, manager_key):
     return super(HandledOutputData, cls).__new__(
         cls,
         output_name=check.str_param(output_name, "output_name"),
         manager_key=check.str_param(manager_key, "manager_key"),
     )
Exemple #11
0
def hourly_schedule(
    pipeline_name,
    start_date,
    name=None,
    execution_time=datetime.time(0, 0),
    tags_fn_for_date=None,
    solid_subset=None,
    mode="default",
    should_execute=None,
    environment_vars=None,
):
    check.opt_str_param(name, 'name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date')
    check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str)
    mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)
    check.opt_callable_param(should_execute, 'should_execute')
    check.opt_dict_param(environment_vars,
                         'environment_vars',
                         key_type=str,
                         value_type=str)
    check.str_param(pipeline_name, 'pipeline_name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.inst_param(execution_time, 'execution_time', datetime.time)

    if execution_time.hour != 0:
        warnings.warn(
            "Hourly schedule {schedule_name} created with:\n"
            "\tschedule_time=datetime.time(hour={hour}, minute={minute}, ...)."
            "Since this is a hourly schedule, the hour parameter will be ignored and the schedule "
            "will run on the {minute} mark for the previous hour interval. Replace "
            "datetime.time(hour={hour}, minute={minute}, ...) with datetime.time(minute={minute}, ...)"
            "to fix this warning.")

    cron_schedule = '{minute} * * * *'.format(minute=execution_time.minute)

    partition_fn = date_partition_range(start_date,
                                        delta=datetime.timedelta(hours=1),
                                        fmt="%Y-%m-%d-%H:%M")

    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value = lambda partition: {}
        if tags_fn_for_date:
            tags_fn_for_partition_value = lambda partition: tags_fn_for_date(
                partition.value)

        partition_set = PartitionSetDefinition(
            name='{}_hourly'.format(pipeline_name),
            pipeline_name=pipeline_name,
            partition_fn=partition_fn,
            environment_dict_fn_for_partition=lambda partition: fn(partition.
                                                                   value),
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )

    return inner
Exemple #12
0
    def docker_service_up(docker_compose_file, service_name, conn_args=None):
        check.invariant(
            TestPostgresInstance.dagster_postgres_installed(),
            "dagster_postgres must be installed to test with postgres",
        )
        check.str_param(service_name, "service_name")
        check.str_param(docker_compose_file, "docker_compose_file")
        check.invariant(os.path.isfile(docker_compose_file),
                        "docker_compose_file must specify a valid file")
        conn_args = check.opt_dict_param(conn_args,
                                         "conn_args") if conn_args else {}

        from dagster_postgres.utils import wait_for_connection  # pylint: disable=import-error

        if BUILDKITE:
            yield TestPostgresInstance.conn_string(
                **conn_args)  # buildkite docker is handled in pipeline setup
            return

        try:
            subprocess.check_output([
                "docker-compose", "-f", docker_compose_file, "stop",
                service_name
            ])
            subprocess.check_output([
                "docker-compose", "-f", docker_compose_file, "rm", "-f",
                service_name
            ])
        except subprocess.CalledProcessError:
            pass

        try:
            subprocess.check_output(
                [
                    "docker-compose", "-f", docker_compose_file, "up", "-d",
                    service_name
                ],
                stderr=subprocess.STDOUT,  # capture STDERR for error handling
            )
        except subprocess.CalledProcessError as ex:
            err_text = ex.output.decode()
            raise PostgresDockerError(
                "Failed to launch docker container(s) via docker-compose: {}".
                format(err_text),
                ex,
            )

        conn_str = TestPostgresInstance.conn_string(**conn_args)
        wait_for_connection(conn_str, retry_limit=10, retry_wait=3)
        yield conn_str

        try:
            subprocess.check_output([
                "docker-compose", "-f", docker_compose_file, "stop",
                service_name
            ])
            subprocess.check_output([
                "docker-compose", "-f", docker_compose_file, "rm", "-f",
                service_name
            ])
        except subprocess.CalledProcessError:
            pass
Exemple #13
0
 def __new__(cls, config_type_snap, value_rep):
     check.inst_param(config_type_snap, "config_type", ConfigTypeSnap)
     return super(RuntimeMismatchErrorData, cls).__new__(
         cls, config_type_snap, check.str_param(value_rep, "value_rep"),
     )
Exemple #14
0
 def __new__(cls, field_name, field_snap):
     return super(MissingFieldErrorData, cls).__new__(
         cls,
         check.str_param(field_name, "field_name"),
         check.inst_param(field_snap, "field_snap", ConfigFieldSnap),
     )
Exemple #15
0
 def get_invocation(self, solid_name):
     check.str_param(solid_name, "solid_name")
     return self._invocations_dict[solid_name]
 def __new__(cls, step_key, output_name='result'):
     return super(StepOutputHandle, cls).__new__(
         cls,
         step_key=check.str_param(step_key, 'step_key'),
         output_name=check.str_param(output_name, 'output_name'),
     )
Exemple #17
0
 def get_downstream_inputs(self, solid_name, output_name):
     check.str_param(solid_name, "solid_name")
     check.str_param(output_name, "output_name")
     return self._output_to_upstream_index[solid_name][output_name]
Exemple #18
0
 def __init__(self, *args, **kwargs):
     self.invalid_run_id = check.str_param(kwargs.pop("invalid_run_id"), "invalid_run_id")
     super(DagsterRunNotFoundError, self).__init__(*args, **kwargs)
Exemple #19
0
def validate_solid_fn(
    decorator_name: str,
    fn_name: str,
    compute_fn: Callable[..., Any],
    input_defs: List[InputDefinition],
    expected_positionals: Optional[List[str]] = None,
    exclude_nothing: Optional[bool] = True,
) -> List[str]:
    check.str_param(decorator_name, "decorator_name")
    check.str_param(fn_name, "fn_name")
    check.callable_param(compute_fn, "compute_fn")
    check.list_param(input_defs, "input_defs", of_type=InputDefinition)
    expected_positionals = check.opt_list_param(expected_positionals,
                                                "expected_positionals",
                                                of_type=str)
    if exclude_nothing:
        names = set(inp.name for inp in input_defs
                    if not inp.dagster_type.kind == DagsterTypeKind.NOTHING)
        nothing_names = set(
            inp.name for inp in input_defs
            if inp.dagster_type.kind == DagsterTypeKind.NOTHING)
    else:
        names = set(inp.name for inp in input_defs)
        nothing_names = set()

    # Currently being super strict about naming. Might be a good idea to relax. Starting strict.
    fn_positionals, input_args = split_function_parameters(
        compute_fn, expected_positionals)

    # Validate Positional Parameters
    missing_positional = validate_decorated_fn_positionals(
        fn_positionals, expected_positionals)
    if missing_positional:
        raise DagsterInvalidDefinitionError(
            "{decorator_name} '{solid_name}' decorated function does not have required positional "
            "parameter '{missing_param}'. Solid functions should only have keyword arguments "
            "that match input names and a first positional parameter named 'context'."
            .format(decorator_name=decorator_name,
                    solid_name=fn_name,
                    missing_param=missing_positional))

    # Validate non positional parameters
    invalid_function_info = validate_decorated_fn_input_args(names, input_args)
    if invalid_function_info:
        if invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[
                "vararg"]:
            raise DagsterInvalidDefinitionError(
                "{decorator_name} '{solid_name}' decorated function has positional vararg parameter "
                "'{param}'. Solid functions should only have keyword arguments that match "
                "input names and a first positional parameter named 'context'."
                .format(
                    decorator_name=decorator_name,
                    solid_name=fn_name,
                    param=invalid_function_info.param,
                ))
        elif invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[
                "missing_name"]:
            if invalid_function_info.param in nothing_names:
                raise DagsterInvalidDefinitionError(
                    "{decorator_name} '{solid_name}' decorated function has parameter '{param}' that is "
                    "one of the solid input_defs of type 'Nothing' which should not be included since "
                    "no data will be passed for it. ".format(
                        decorator_name=decorator_name,
                        solid_name=fn_name,
                        param=invalid_function_info.param,
                    ))
            else:
                raise DagsterInvalidDefinitionError(
                    "{decorator_name} '{solid_name}' decorated function has parameter '{param}' that is not "
                    "one of the solid input_defs. Solid functions should only have keyword arguments "
                    "that match input names and a first positional parameter named 'context'."
                    .format(
                        decorator_name=decorator_name,
                        solid_name=fn_name,
                        param=invalid_function_info.param,
                    ))
        elif invalid_function_info.error_type == InvalidDecoratedFunctionInfo.TYPES[
                "extra"]:
            undeclared_inputs_printed = ", '".join(
                invalid_function_info.missing_names)
            raise DagsterInvalidDefinitionError(
                "{decorator_name} '{solid_name}' decorated function does not have parameter(s) "
                "'{undeclared_inputs_printed}', which are in solid's input_defs. Solid functions "
                "should only have keyword arguments that match input names and a first positional "
                "parameter named 'context'.".format(
                    decorator_name=decorator_name,
                    solid_name=fn_name,
                    undeclared_inputs_printed=undeclared_inputs_printed,
                ))

    return positional_arg_name_list(input_args)
Exemple #20
0
 def __init__(self, *args, **kwargs):
     self.step_key = check.str_param(kwargs.pop("step_key"), "step_key")
     self.output_name = check.str_param(kwargs.pop("output_name"), "output_name")
     super(DagsterStepOutputNotFoundError, self).__init__(*args, **kwargs)
Exemple #21
0
 def has_step(self, key):
     check.str_param(key, "key")
     return key in self.step_dict
Exemple #22
0
 def __init__(self, *args, **kwargs):
     self.step_key = check.str_param(kwargs.pop("step_key"), "step_key")
     self.input_name = check.str_param(kwargs.pop("input_name"), "input_name")
     super(DagsterExecutionLoadInputError, self).__init__(*args, **kwargs)
Exemple #23
0
 def __init__(self, message):
     super(DauphinInvalidPipelineRunsFilterError, self).__init__()
     self.message = check.str_param(message, 'message')
Exemple #24
0
 def __init__(self, *args, **kwargs):
     self.step_key = check.str_param(kwargs.pop("step_key"), "step_key")
     self.output_name = check.str_param(kwargs.pop("output_name"), "output_name")
     super(DagsterExecutionHandleOutputError, self).__init__(*args, **kwargs)
Exemple #25
0
 def __init__(self, preset, selector):
     self.preset = check.str_param(preset, 'preset')
     self.message = 'Preset {preset} not found in pipeline {pipeline}.'.format(
         preset=preset, pipeline=selector.name
     )
Exemple #26
0
 def __init__(self, *args, **kwargs):
     self.step_key = check.str_param(kwargs.pop("step_key"), "step_key")
     self.solid_name = check.str_param(kwargs.pop("solid_name"), "solid_name")
     self.solid_def_name = check.str_param(kwargs.pop("solid_def_name"), "solid_def_name")
     super(DagsterExecutionStepExecutionError, self).__init__(*args, **kwargs)
Exemple #27
0
 def __init__(self, field_name):
     self._field_name = check.str_param(field_name, 'field_name')
     super(DauphinEvaluationStackPathEntry, self).__init__()
Exemple #28
0
 def __init__(self, run_id):
     self.run_id = check.str_param(run_id, "run_id")
     super(DagsterEventLogInvalidForRun, self).__init__(
         "Event logs invalid for run id {}".format(run_id)
     )
Exemple #29
0
 def __init__(self, schedule_name):
     super(DauphinScheduleNotFoundError, self).__init__()
     self.schedule_name = check.str_param(schedule_name, 'schedule_name')
     self.message = (
         'Schedule {schedule_name} is not present in the currently loaded repository.'
     ).format(schedule_name=schedule_name)
Exemple #30
0
 def __new__(cls, location_name, repository_name):
     return super(RepositorySelector, cls).__new__(
         cls,
         location_name=check.str_param(location_name, "location_name"),
         repository_name=check.str_param(repository_name, "repository_name"),
     )