Example #1
0
def create_grpc_watch_thread(
    client,
    on_disconnect=None,
    on_reconnected=None,
    on_updated=None,
    on_error=None,
    watch_interval=None,
    max_reconnect_attempts=None,
):
    check.inst_param(client, "client", DagsterGrpcClient)

    noop = lambda *a: None
    on_disconnect = check.opt_callable_param(on_disconnect, "on_disconnect", noop)
    on_reconnected = check.opt_callable_param(on_reconnected, "on_reconnected", noop)
    on_updated = check.opt_callable_param(on_updated, "on_updated", noop)
    on_error = check.opt_callable_param(on_error, "on_error", noop)

    shutdown_event = threading.Event()
    thread = threading.Thread(
        target=watch_grpc_server_thread,
        args=[
            client,
            on_disconnect,
            on_reconnected,
            on_updated,
            on_error,
            shutdown_event,
            watch_interval,
            max_reconnect_attempts,
        ],
    )
    thread.daemon = True
    return shutdown_event, thread
Example #2
0
    def execute_queries(
        self, queries, fetch_results=False, cursor_factory=None, error_callback=None
    ):
        '''Fake for execute_queries; returns [self.QUERY_RESULT] * 3

        Args:
            queries (List[str]): The queries to execute.
            fetch_results (Optional[bool]): Whether to return the results of executing the query.
                Defaults to False, in which case the query will be executed without retrieving the
                results.
            cursor_factory (Optional[:py:class:`psycopg2.extensions.cursor`]): An alternative
                cursor_factory; defaults to None. Will be used when constructing the cursor.
            error_callback (Optional[Callable[[Exception, Cursor, DagsterLogManager], None]]): A
                callback function, invoked when an exception is encountered during query execution;
                this is intended to support executing additional queries to provide diagnostic
                information, e.g. by querying ``stl_load_errors`` using ``pg_last_copy_id()``. If no
                function is provided, exceptions during query execution will be raised directly.

        Returns:
            Optional[List[List[Tuple[Any, ...]]]]: Results of the query, as a list of list of
                tuples, when fetch_results is set. Otherwise return None.
        '''
        check.list_param(queries, 'queries', of_type=str)
        check.bool_param(fetch_results, 'fetch_results')
        check.opt_subclass_param(cursor_factory, 'cursor_factory', psycopg2.extensions.cursor)
        check.opt_callable_param(error_callback, 'error_callback')

        for query in queries:
            self.log.info('Executing query \'{query}\''.format(query=query))
        if fetch_results:
            return [self.QUERY_RESULT] * 3
Example #3
0
def load_python_module(module_name, working_directory, remove_from_path_fn=None):
    check.str_param(module_name, "module_name")
    check.opt_str_param(working_directory, "working_directory")
    check.opt_callable_param(remove_from_path_fn, "remove_from_path_fn")

    # Use the passed in working directory for local imports (sys.path[0] isn't
    # consistently set in the different entry points that Dagster uses to import code)
    remove_paths = remove_from_path_fn() if remove_from_path_fn else []  # hook for tests
    remove_paths.insert(0, sys.path[0])  # remove the script path

    with alter_sys_path(
        to_add=([working_directory] if working_directory else []), to_remove=remove_paths
    ):
        try:
            return importlib.import_module(module_name)
        except ImportError as ie:
            msg = get_import_error_message(ie)
            if working_directory:
                abs_working_directory = os.path.abspath(os.path.expanduser(working_directory))
                raise DagsterImportError(
                    f"Encountered ImportError: `{msg}` while importing module {module_name}. "
                    f"Local modules were resolved using the working "
                    f"directory `{abs_working_directory}`. If another working directory should be "
                    "used, please explicitly specify the appropriate path using the `-d` or "
                    "`--working-directory` for CLI based targets or the `working_directory` "
                    "configuration option for workspace targets. "
                ) from ie
            else:
                raise DagsterImportError(
                    f"Encountered ImportError: `{msg}` while importing module {module_name}. "
                    f"If relying on the working directory to resolve modules, please "
                    "explicitly specify the appropriate path using the `-d` or "
                    "`--working-directory` for CLI based targets or the `working_directory` "
                    "configuration option for workspace targets. "
                ) from ie
Example #4
0
    def __init__(
        self,
        resource_fn=None,
        config_schema=None,
        description=None,
        _configured_config_mapping_fn=None,
        version=None,
    ):
        EXPECTED_POSITIONALS = ["*"]
        fn_positionals, _ = split_function_parameters(resource_fn, EXPECTED_POSITIONALS)
        missing_positional = validate_decorated_fn_positionals(fn_positionals, EXPECTED_POSITIONALS)

        if missing_positional:
            raise DagsterInvalidDefinitionError(
                "@resource '{resource_name}' decorated function does not have required "
                "positional parameter '{missing_param}'. Resource functions should only have keyword "
                "arguments that match input names and a first positional parameter.".format(
                    resource_name=resource_fn.__name__, missing_param=missing_positional
                )
            )

        self._resource_fn = check.opt_callable_param(resource_fn, "resource_fn")
        self._config_schema = check_user_facing_opt_config_param(config_schema, "config_schema")
        self._description = check.opt_str_param(description, "description")
        self.__configured_config_mapping_fn = check.opt_callable_param(
            _configured_config_mapping_fn, "config_mapping_fn"
        )
        self._version = check.opt_str_param(version, "version")
        if version:
            experimental_arg_warning("version", "ResourceDefinition.__init__")
 def __init__(
     self,
     name,
     is_persistent,
     required_resource_keys,
     config_schema=None,
     intermediate_storage_creation_fn=None,
     _configured_config_mapping_fn=None,
 ):
     self._name = check_valid_name(name)
     self._is_persistent = check.bool_param(is_persistent, "is_persistent")
     self._config_schema = check_user_facing_opt_config_param(config_schema, "config_schema")
     self._intermediate_storage_creation_fn = check.opt_callable_param(
         intermediate_storage_creation_fn, "intermediate_storage_creation_fn"
     )
     self._required_resource_keys = frozenset(
         check.set_param(
             required_resource_keys if required_resource_keys else set(),
             "required_resource_keys",
             of_type=str,
         )
     )
     self.__configured_config_mapping_fn = check.opt_callable_param(
         _configured_config_mapping_fn, "config_mapping_fn"
     )
Example #6
0
 def __new__(
     cls,
     solid_handle: SolidHandle,
     name: str,
     dagster_type_key: str,
     is_required: bool,
     is_dynamic: bool,
     is_asset: bool = False,
     get_asset_key: Optional[Callable] = None,
     get_asset_partitions: Optional[Callable] = None,
     should_materialize: Optional[bool] = None,
 ):
     return super(StepOutput, cls).__new__(
         cls,
         solid_handle=check.inst_param(solid_handle, "solid_handle",
                                       SolidHandle),
         name=check.str_param(name, "name"),
         dagster_type_key=check.str_param(dagster_type_key,
                                          "dagster_type_key"),
         is_required=check.bool_param(is_required, "is_required"),
         is_dynamic=check.bool_param(is_dynamic, "is_dynamic"),
         is_asset=check.bool_param(is_asset, "is_asset"),
         get_asset_key=check.opt_callable_param(get_asset_key,
                                                "get_asset_key"),
         get_asset_partitions=check.opt_callable_param(
             get_asset_partitions, "get_asset_partitions"),
         should_materialize=check.opt_bool_param(should_materialize,
                                                 "should_materialize"),
     )
Example #7
0
def canonicalize_backcompat_args(new_val,
                                 new_arg,
                                 old_val,
                                 old_arg,
                                 coerce_old_to_new=None,
                                 additional_warn_txt=None):
    '''
    Utility for managing backwards compatibility of two related arguments.

    For example if you had an existing function

    def is_new(old_flag):
        return not new_flag

    And you decided you wanted a new function to be:

    def is_new(new_flag):
        return new_flag

    However you want an in between period where either flag is accepted. Use
    canonicalize_backcompat_args to manage that:

    def is_new(old_flag=None, new_flag=None):
        return canonicalize_backcompat_args(
            new_val=new_flag,
            new_arg='new_flag',
            old_val=old_flag,
            old_arg='old_flag',
            coerce_old_to_new=lambda val: not val,
        )


    In this example, if the caller sets both new_flag and old_flag, it will fail by throwing
    a CheckError. If the caller sets old_flag, it will run it through the coercion function
    , warn, and then execute.

    canonicalize_backcompat_args returns the value as if *only* new_val were specified
    '''
    check.str_param(new_arg, 'new_arg')
    check.str_param(old_arg, 'old_arg')
    check.opt_callable_param(coerce_old_to_new, 'coerce_old_to_new')
    check.opt_str_param(additional_warn_txt, 'additional_warn_txt')
    if new_val is not None:
        if old_val is not None:
            check.failed(
                'Do not use deprecated "{old_arg}" now that you are using "{new_arg}".'
                .format(old_arg=old_arg, new_arg=new_arg))
        return new_val
    if old_val is not None:
        warnings.warn(
            '"{old_arg}" is deprecated, use "{new_arg}" instead.'.format(
                old_arg=old_arg, new_arg=new_arg) +
            (' ' + additional_warn_txt) if additional_warn_txt else '',
            # This punches up to the caller of canonicalize_backcompat_args
            stacklevel=3,
        )
        return coerce_old_to_new(old_val) if coerce_old_to_new else old_val

    return new_val
Example #8
0
def daily_schedule(
    pipeline_name,
    start_date,
    name=None,
    execution_time=datetime.time(0, 0),
    tags=None,
    tags_fn_for_date=None,
    solid_subset=None,
    mode="default",
    should_execute=None,
    environment_vars=None,
):

    from dagster.core.definitions.partition import PartitionSetDefinition

    check.opt_str_param(name, 'name')
    check.str_param(pipeline_name, 'pipeline_name')
    check.inst_param(start_date, 'start_date', datetime.datetime)
    check.inst_param(execution_time, 'execution_time', datetime.time)
    check.opt_dict_param(tags, 'tags', key_type=str, value_type=str)
    check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date')
    check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str)
    mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)
    check.opt_callable_param(should_execute, 'should_execute')
    check.opt_dict_param(environment_vars,
                         'environment_vars',
                         key_type=str,
                         value_type=str)

    cron_schedule = '{minute} {hour} * * *'.format(
        minute=execution_time.minute, hour=execution_time.hour)

    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        def _environment_dict_fn_for_partition(partition):
            return fn(partition.value)

        partition_set_name = '{}_daily'.format(pipeline_name)
        partition_set = PartitionSetDefinition(
            name=partition_set_name,
            pipeline_name=pipeline_name,
            partition_fn=date_partition_range(start_date),
            environment_dict_fn_for_partition=
            _environment_dict_fn_for_partition,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )

    return inner
Example #9
0
    def execute_query(self, query, fetch_results=False, cursor_factory=None, error_callback=None):
        '''Synchronously execute a single query against Redshift. Will return a list of rows, where
        each row is a tuple of values, e.g. SELECT 1 will return [(1,)].

        Args:
            query (str): The query to execute.
            fetch_results (Optional[bool]): Whether to return the results of executing the query.
                Defaults to False, in which case the query will be executed without retrieving the
                results.
            cursor_factory (Optional[:py:class:`psycopg2.extensions.cursor`]): An alternative
                cursor_factory; defaults to None. Will be used when constructing the cursor.
            error_callback (Optional[Callable[[Exception, Cursor, DagsterLogManager], None]]): A
                callback function, invoked when an exception is encountered during query execution;
                this is intended to support executing additional queries to provide diagnostic
                information, e.g. by querying ``stl_load_errors`` using ``pg_last_copy_id()``. If no
                function is provided, exceptions during query execution will be raised directly.

        Returns:
            Optional[List[Tuple[Any, ...]]]: Results of the query, as a list of tuples, when
                fetch_results is set. Otherwise return None.
        '''
        check.str_param(query, 'query')
        check.bool_param(fetch_results, 'fetch_results')
        check.opt_subclass_param(cursor_factory, 'cursor_factory', psycopg2.extensions.cursor)
        check.opt_callable_param(error_callback, 'error_callback')

        with self._get_conn() as conn:
            with self._get_cursor(conn, cursor_factory=cursor_factory) as cursor:
                try:
                    six.ensure_str(query)

                    self.log.info('Executing query \'{query}\''.format(query=query))
                    cursor.execute(query)

                    if fetch_results and cursor.rowcount > 0:
                        return cursor.fetchall()
                    else:
                        self.log.info('Empty result from query')

                except Exception as e:  # pylint: disable=broad-except
                    # If autocommit is disabled or not set (it is disabled by default), Redshift
                    # will be in the middle of a transaction at exception time, and because of
                    # the failure the current transaction will not accept any further queries.
                    #
                    # This conn.commit() call closes the open transaction before handing off
                    # control to the error callback, so that the user can issue additional
                    # queries. Notably, for e.g. pg_last_copy_id() to work, it requires you to
                    # use the same conn/cursor, so you have to do this conn.commit() to ensure
                    # things are in a usable state in the error callback.
                    if not self.autocommit:
                        conn.commit()

                    if error_callback is not None:
                        error_callback(e, cursor, self.log)
                    else:
                        raise
Example #10
0
def triggered_execution(
    pipeline_name,
    name=None,
    mode="default",
    solid_selection=None,
    tags_fn=None,
    should_execute_fn=None,
):
    """
    The decorated function will be called as the ``run_config_fn`` of the underlying
    :py:class:`~dagster.TriggeredDefinition` and should take a
    :py:class:`~dagster.TriggeredExecutionContext` as its only argument, returning the environment
    dict for the triggered execution.

    Args:
        pipeline_name (str): The name of the pipeline to execute when the trigger fires.
        name (Optional[str]): The name of this triggered execution.
        solid_selection (Optional[List[str]]): A list of solid subselection (including single
            solid names) to execute when the trigger fires. e.g. ``['*some_solid+', 'other_solid']``
        mode (Optional[str]): The pipeline mode to apply for the triggered execution
            (Default: 'default')
        tags_fn (Optional[Callable[[TriggeredExecutionContext], Optional[Dict[str, str]]]]): A
            function that generates tags to attach to the triggered execution. Takes a
            :py:class:`~dagster.TriggeredExecutionContext` and returns a dictionary of tags (string
            key-value pairs).
        should_execute_fn (Optional[Callable[[TriggeredExecutionContext], bool]]): A function that
            runs at trigger time to determine whether a pipeline execution should be initiated or
            skipped. Takes a :py:class:`~dagster.TriggeredExecutionContext` and returns a boolean
            (``True`` if a pipeline run should be execute). Defaults to a function that always
            returns ``True``.
    """

    check.str_param(pipeline_name, "pipeline_name")
    check.opt_str_param(name, "name")
    check.str_param(mode, "mode")
    check.opt_nullable_list_param(solid_selection,
                                  "solid_selection",
                                  of_type=str)
    check.opt_callable_param(tags_fn, "tags_fn")
    check.opt_callable_param(should_execute_fn, "should_execute_fn")

    def inner(fn):
        check.callable_param(fn, "fn")
        trigger_name = name or fn.__name__

        return TriggeredExecutionDefinition(
            name=trigger_name,
            pipeline_name=pipeline_name,
            run_config_fn=fn,
            tags_fn=tags_fn,
            should_execute_fn=should_execute_fn,
            mode=mode,
            solid_selection=solid_selection,
        )

    return inner
Example #11
0
def execute_run_host_mode(
    pipeline: ReconstructablePipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    get_executor_def_fn: Callable[[Optional[str]], ExecutorDefinition] = None,
    raise_on_error: bool = False,
):
    check.inst_param(pipeline, "pipeline", ReconstructablePipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    check.opt_callable_param(get_executor_def_fn, "get_executor_def_fn")

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING"
        .format(pipeline_run.pipeline_name, pipeline_run.run_id,
                pipeline_run.status),
    )

    if pipeline_run.solids_to_execute:
        pipeline = pipeline.subset_for_execution_from_existing_pipeline(
            pipeline_run.solids_to_execute)

    execution_plan_snapshot = instance.get_execution_plan_snapshot(
        pipeline_run.execution_plan_snapshot_id)
    execution_plan = ExecutionPlan.rebuild_from_snapshot(
        pipeline_run.pipeline_name,
        execution_plan_snapshot,
    )

    _execute_run_iterable = ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=pipeline_execution_iterator,
        execution_context_manager=PlanOrchestrationContextManager(
            context_event_generator=host_mode_execution_context_event_generator,
            pipeline=pipeline,
            execution_plan=execution_plan,
            run_config=pipeline_run.run_config,
            pipeline_run=pipeline_run,
            instance=instance,
            raise_on_error=raise_on_error,
            get_executor_def_fn=get_executor_def_fn,
            output_capture=None,
        ),
    )
    event_list = list(_execute_run_iterable)
    return event_list
Example #12
0
    def __init__(
        self,
        name,
        cron_schedule,
        pipeline_name,
        run_config=None,
        run_config_fn=None,
        tags=None,
        tags_fn=None,
        solid_selection=None,
        mode="default",
        should_execute=None,
        environment_vars=None,
    ):

        self._name = check.str_param(name, 'name')
        self._cron_schedule = check.str_param(cron_schedule, 'cron_schedule')
        self._pipeline_name = check.str_param(pipeline_name, 'pipeline_name')
        self._run_config = check.opt_dict_param(run_config, 'run_config')
        self._tags = check.opt_dict_param(tags,
                                          'tags',
                                          key_type=str,
                                          value_type=str)

        check.opt_callable_param(tags_fn, 'tags_fn')
        self._solid_selection = check.opt_nullable_list_param(
            solid_selection, 'solid_selection', of_type=str)
        self._mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)
        check.opt_callable_param(should_execute, 'should_execute')
        self._environment_vars = check.opt_dict_param(environment_vars,
                                                      'environment_vars',
                                                      key_type=str,
                                                      value_type=str)

        if run_config_fn and run_config:
            raise DagsterInvalidDefinitionError(
                'Attempted to provide both run_config_fn and run_config as arguments'
                ' to ScheduleDefinition. Must provide only one of the two.')

        if not run_config and not run_config_fn:
            run_config_fn = lambda _context: {}
        self._run_config_fn = run_config_fn

        if tags_fn and tags:
            raise DagsterInvalidDefinitionError(
                'Attempted to provide both tags_fn and tags as arguments'
                ' to ScheduleDefinition. Must provide only one of the two.')

        if not tags and not tags_fn:
            tags_fn = lambda _context: {}
        self._tags_fn = tags_fn

        if not should_execute:
            should_execute = lambda _context: True
        self._should_execute = should_execute
Example #13
0
    def __init__(
        self,
        name,
        cron_schedule,
        pipeline_name,
        run_config=None,
        run_config_fn=None,
        tags=None,
        tags_fn=None,
        solid_selection=None,
        mode="default",
        should_execute=None,
        environment_vars=None,
    ):

        self._name = check_for_invalid_name_and_warn(name)
        self._cron_schedule = check.str_param(cron_schedule, "cron_schedule")
        self._pipeline_name = check.str_param(pipeline_name, "pipeline_name")
        self._run_config = check.opt_dict_param(run_config, "run_config")
        self._tags = check.opt_dict_param(tags,
                                          "tags",
                                          key_type=str,
                                          value_type=str)

        check.opt_callable_param(tags_fn, "tags_fn")
        self._solid_selection = check.opt_nullable_list_param(
            solid_selection, "solid_selection", of_type=str)
        self._mode = check.opt_str_param(mode, "mode", DEFAULT_MODE_NAME)
        check.opt_callable_param(should_execute, "should_execute")
        self._environment_vars = check.opt_dict_param(environment_vars,
                                                      "environment_vars",
                                                      key_type=str,
                                                      value_type=str)

        if run_config_fn and run_config:
            raise DagsterInvalidDefinitionError(
                "Attempted to provide both run_config_fn and run_config as arguments"
                " to ScheduleDefinition. Must provide only one of the two.")

        if not run_config and not run_config_fn:
            run_config_fn = lambda _context: {}
        self._run_config_fn = run_config_fn

        if tags_fn and tags:
            raise DagsterInvalidDefinitionError(
                "Attempted to provide both tags_fn and tags as arguments"
                " to ScheduleDefinition. Must provide only one of the two.")

        if not tags and not tags_fn:
            tags_fn = lambda _context: {}
        self._tags_fn = tags_fn

        if not should_execute:
            should_execute = lambda _context: True
        self._should_execute = should_execute
Example #14
0
def load_python_module(module_name, warn_only=False, remove_from_path_fn=None):
    check.str_param(module_name, "module_name")
    check.bool_param(warn_only, "warn_only")
    check.opt_callable_param(remove_from_path_fn, "remove_from_path_fn")

    error = None

    remove_paths = remove_from_path_fn() if remove_from_path_fn else []  # hook for tests
    remove_paths.insert(0, sys.path[0])  # remove the working directory

    with alter_sys_path(to_add=[], to_remove=remove_paths):
        try:
            module = importlib.import_module(module_name)
        except ImportError as ie:
            error = ie

    if error:
        try:
            module = importlib.import_module(module_name)

            # if here, we were able to resolve the module with the working directory on the path,
            # but should error because we may not always invoke from the same directory (e.g. from
            # cron)
            if warn_only:
                warnings.warn(
                    (
                        "Module {module} was resolved using the working directory. The ability to "
                        "load uninstalled modules from the working directory is deprecated and "
                        "will be removed in a future release.  Please use the python-file based "
                        "load arguments or install {module} to your python environment."
                    ).format(module=module_name)
                )
            else:
                six.raise_from(
                    DagsterInvariantViolationError(
                        (
                            "Module {module} not found. Packages must be installed rather than "
                            "relying on the working directory to resolve module loading."
                        ).format(module=module_name)
                    ),
                    error,
                )
        except RuntimeError:
            # We might be here because numpy throws run time errors at import time when being
            # imported multiple times, just raise the original import error
            raise error
        except ImportError as ie:
            raise error

    return module
Example #15
0
 def __init__(self,
              python_type=None,
              key=None,
              name=None,
              typecheck_metadata_fn=None,
              type_check=None,
              **kwargs):
     name = check.opt_str_param(name, 'name', type(self).__name__)
     key = check.opt_str_param(key, 'key', name)
     super(PythonObjectType, self).__init__(key=key, name=name, **kwargs)
     self.python_type = check.type_param(python_type, 'python_type')
     self.typecheck_metadata_fn = check.opt_callable_param(
         typecheck_metadata_fn, 'typecheck_metadata_fn')
     self._user_type_check = check.opt_callable_param(
         type_check, 'type_check')
Example #16
0
def job(
    pipeline_name,
    name=None,
    mode="default",
    solid_selection=None,
    tags_fn=None,
):
    """
    The decorated function will be called as the ``run_config_fn`` of the underlying
    :py:class:`~dagster.JobDefinition` and should take a
    :py:class:`~dagster.JobContext` as its only argument, returning the run config dict for
    the pipeline execution.

    Args:
        pipeline_name (str): The name of the pipeline to execute.
        name (Optional[str]): The name of this job.
        solid_selection (Optional[List[str]]): A list of solid subselection (including single
            solid names) for the pipeline execution e.g. ``['*some_solid+', 'other_solid']``
        mode (Optional[str]): The pipeline mode to apply for the pipeline execution
            (Default: 'default')
        tags_fn (Optional[Callable[[JobContext], Optional[Dict[str, str]]]]): A
            function that generates tags to attach to the pipeline execution. Takes a
            :py:class:`~dagster.JobContext` and returns a dictionary of tags (string
            key-value pairs).
    """

    check.str_param(pipeline_name, "pipeline_name")
    check.opt_str_param(name, "name")
    check.str_param(mode, "mode")
    check.opt_nullable_list_param(solid_selection,
                                  "solid_selection",
                                  of_type=str)
    check.opt_callable_param(tags_fn, "tags_fn")

    def inner(fn):
        check.callable_param(fn, "fn")
        job_name = name or fn.__name__

        return JobDefinition(
            name=job_name,
            pipeline_name=pipeline_name,
            run_config_fn=fn,
            tags_fn=tags_fn,
            mode=mode,
            solid_selection=solid_selection,
        )

    return inner
Example #17
0
    def __new__(
        cls,
        run_id=None,
        tags=None,
        event_callback=None,
        loggers=None,
        executor_config=None,
        reexecution_config=None,
        step_keys_to_execute=None,
        mode=None,
    ):
        check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str)

        tags = check.opt_dict_param(tags, 'tags', key_type=str)

        if EXECUTION_TIME_KEY in tags:
            tags[EXECUTION_TIME_KEY] = float(tags[EXECUTION_TIME_KEY])
        else:
            tags[EXECUTION_TIME_KEY] = time.time()

        return super(RunConfig, cls).__new__(
            cls,
            run_id=check.str_param(run_id, 'run_id') if run_id else make_new_run_id(),
            tags=tags,
            event_callback=check.opt_callable_param(event_callback, 'event_callback'),
            loggers=check.opt_list_param(loggers, 'loggers'),
            executor_config=check.inst_param(executor_config, 'executor_config', ExecutorConfig)
            if executor_config
            else InProcessExecutorConfig(),
            reexecution_config=check.opt_inst_param(
                reexecution_config, 'reexecution_config', ReexecutionConfig
            ),
            step_keys_to_execute=step_keys_to_execute,
            mode=check.opt_str_param(mode, 'mode'),
        )
Example #18
0
    def __init__(
        self,
        execution_plan,
        run_config,
        pipeline_run,
        instance,
        scoped_resources_builder_cm=None,
        system_storage_data=None,
        intermediate_storage=None,
        raise_on_error=False,
    ):
        scoped_resources_builder_cm = check.opt_callable_param(
            scoped_resources_builder_cm,
            "scoped_resources_builder_cm",
            default=resource_initialization_manager,
        )
        generator = self.event_generator(
            execution_plan,
            run_config,
            pipeline_run,
            instance,
            scoped_resources_builder_cm,
            system_storage_data,
            intermediate_storage,
            raise_on_error,
        )

        self._manager = EventGenerationManager(generator, self.context_type,
                                               raise_on_error)
Example #19
0
    def __init__(
        self,
        name=None,
        input_defs=None,
        output_defs=None,
        description=None,
        required_resource_keys=None,
        config=None,
        metadata=None,
        step_metadata_fn=None,
    ):
        self.name = check.opt_str_param(name, 'name')
        self.input_defs = check.opt_nullable_list_param(input_defs, 'input_defs', InputDefinition)
        self.output_defs = check.opt_nullable_list_param(
            output_defs, 'output_defs', OutputDefinition
        )

        self.description = check.opt_str_param(description, 'description')

        # resources will be checked within SolidDefinition
        self.required_resource_keys = required_resource_keys

        # config will be checked within SolidDefinition
        self.config = config

        # metadata will be checked within ISolidDefinition
        self.metadata = metadata

        self.step_metadata_fn = check.opt_callable_param(step_metadata_fn, 'step_metadata_fn')
Example #20
0
 def __new__(
     cls,
     name,
     is_persistent,
     required_resource_keys,
     config_schema=None,
     system_storage_creation_fn=None,
     config=None,
 ):
     return super(SystemStorageDefinition, cls).__new__(
         cls,
         name=check.str_param(name, 'name'),
         is_persistent=check.bool_param(is_persistent, 'is_persistent'),
         config_schema=canonicalize_backcompat_args(
             check_user_facing_opt_config_param(config_schema,
                                                'config_schema'),
             'config_schema',
             check_user_facing_opt_config_param(config, 'config'),
             'config',
             '0.9.0',
         ),
         system_storage_creation_fn=check.opt_callable_param(
             system_storage_creation_fn, 'system_storage_creation_fn'),
         required_resource_keys=frozenset(
             check.set_param(required_resource_keys,
                             'required_resource_keys',
                             of_type=str)),
     )
Example #21
0
 def __new__(
     cls,
     name,
     is_persistent,
     required_resource_keys,
     config_schema=None,
     intermediate_storage_creation_fn=None,
 ):
     return super(IntermediateStorageDefinition, cls).__new__(
         cls,
         name=check.str_param(name, 'name'),
         is_persistent=check.bool_param(is_persistent, 'is_persistent'),
         config_schema=check_user_facing_opt_config_param(
             config_schema, 'config_schema'),
         intermediate_storage_creation_fn=check.opt_callable_param(
             intermediate_storage_creation_fn,
             'intermediate_storage_creation_fn'),
         required_resource_keys=frozenset(
             check.set_param(
                 required_resource_keys
                 if required_resource_keys else set(),
                 'required_resource_keys',
                 of_type=str,
             )),
     )
Example #22
0
    def __init__(
        self,
        name,
        input_defs,
        compute_fn,
        output_defs,
        config_schema=None,
        description=None,
        tags=None,
        required_resource_keys=None,
        positional_inputs=None,
        _configured_config_mapping_fn=None,
    ):
        self._compute_fn = check.callable_param(compute_fn, 'compute_fn')
        self._config_schema = check_user_facing_opt_config_param(
            config_schema, 'config_schema')
        self._required_resource_keys = frozenset(
            check.opt_set_param(required_resource_keys,
                                'required_resource_keys',
                                of_type=str))
        self.__configured_config_mapping_fn = check.opt_callable_param(
            _configured_config_mapping_fn, 'config_mapping_fn')

        super(SolidDefinition, self).__init__(
            name=name,
            input_defs=check.list_param(input_defs, 'input_defs',
                                        InputDefinition),
            output_defs=check.list_param(output_defs, 'output_defs',
                                         OutputDefinition),
            description=description,
            tags=check.opt_dict_param(tags, 'tags', key_type=str),
            positional_inputs=positional_inputs,
        )
Example #23
0
 def __new__(cls, name, alias=None, resource_mapper_fn=None):
     name = check.str_param(name, 'name')
     alias = check.opt_str_param(alias, 'alias')
     resource_mapper_fn = check.opt_callable_param(
         resource_mapper_fn, 'resource_mapper_fn', SolidInvocation.default_resource_mapper_fn
     )
     return super(cls, SolidInvocation).__new__(cls, name, alias, resource_mapper_fn)
Example #24
0
    def __init__(
        self,
        run_id,
        loggers=None,
        resources=None,
        event_callback=None,
        environment_config=None,
        tags=None,
        persistence_policy=None,
    ):

        if loggers is None:
            loggers = [define_colored_console_logger('dagster')]

        self._logger = CompositeLogger(loggers=loggers)
        self.resources = resources
        self._run_id = check.str_param(run_id, 'run_id')
        self._tags = check.opt_dict_param(tags, 'tags')
        self.events = ExecutionEvents(self)

        # For re-construction purposes later on
        self._event_callback = check.opt_callable_param(event_callback, 'event_callback')
        self._environment_config = environment_config
        self.persistence_policy = check.opt_inst_param(
            persistence_policy, 'persistence_policy', PersistenceStrategy
        )
Example #25
0
 def __init__(
     self,
     name: Optional[str] = None,
     mode_defs: Optional[List[ModeDefinition]] = None,
     preset_defs: Optional[List[PresetDefinition]] = None,
     description: Optional[str] = None,
     tags: Optional[Dict[str, Any]] = None,
     hook_defs: Optional[Set[HookDefinition]] = None,
     input_defs: Optional[List[InputDefinition]] = None,
     output_defs: Optional[List[OutputDefinition]] = None,
     config_schema: Optional[Dict[str, Any]] = None,
     config_fn: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
 ):
     self.name = check.opt_str_param(name, "name")
     self.mode_definitions = check.opt_list_param(mode_defs, "mode_defs",
                                                  ModeDefinition)
     self.preset_definitions = check.opt_list_param(preset_defs,
                                                    "preset_defs",
                                                    PresetDefinition)
     self.description = check.opt_str_param(description, "description")
     self.tags = check.opt_dict_param(tags, "tags")
     self.hook_defs = check.opt_set_param(hook_defs,
                                          "hook_defs",
                                          of_type=HookDefinition)
     self.input_defs = check.opt_list_param(input_defs,
                                            "input_defs",
                                            of_type=InputDefinition)
     self.did_pass_outputs = output_defs is not None
     self.output_defs = check.opt_nullable_list_param(
         output_defs, "output_defs", of_type=OutputDefinition)
     self.config_schema = config_schema
     self.config_fn = check.opt_callable_param(config_fn, "config_fn")
Example #26
0
def pipeline_initialization_manager(
    pipeline_def,
    environment_dict,
    pipeline_run,
    instance,
    execution_plan,
    scoped_resources_builder_cm=None,
    system_storage_data=None,
    raise_on_error=False,
):
    scoped_resources_builder_cm = check.opt_callable_param(
        scoped_resources_builder_cm,
        'scoped_resources_builder_cm',
        default=resource_initialization_manager,
    )
    generator = pipeline_initialization_event_generator(
        pipeline_def,
        environment_dict,
        pipeline_run,
        instance,
        execution_plan,
        scoped_resources_builder_cm,
        system_storage_data,
        raise_on_error,
    )
    return EventGenerationManager(generator, SystemPipelineExecutionContext,
                                  raise_on_error)
Example #27
0
    def __init__(self, execution_plan, retries, sort_key_fn=None):
        self._plan = check.inst_param(execution_plan, 'execution_plan',
                                      ExecutionPlan)
        self._retries = check.inst_param(retries, 'retries', Retries)
        self._sort_key_fn = check.opt_callable_param(sort_key_fn,
                                                     'sort_key_fn',
                                                     _default_sort_key)

        # All steps to be executed start out here in _pending
        self._pending = self._plan.execution_deps()

        # steps move in to these buckets as a result of _update calls
        self._executable = []
        self._pending_skip = []
        self._pending_retry = []
        self._waiting_to_retry = {}

        # then are considered _in_flight when vended via get_steps_to_*
        self._in_flight = set()

        # and finally their terminal state is tracked by these sets, via mark_*
        self._completed = set()
        self._success = set()
        self._failed = set()
        self._skipped = set()

        # Start the show by loading _executable with the set of _pending steps that have no deps
        self._update()
Example #28
0
    def __init__(
        self,
        name,
        input_defs,
        compute_fn,
        output_defs,
        config_field=None,
        description=None,
        metadata=None,
        required_resource_keys=None,
        step_metadata_fn=None,
    ):
        self._compute_fn = check.callable_param(compute_fn, 'compute_fn')
        self._config_field = check_user_facing_opt_field_param(
            config_field,
            'config_field',
            'of a SolidDefinition or @solid named "{name}"'.format(name=name),
        )
        self._required_resource_keys = check.opt_set_param(
            required_resource_keys, 'required_resource_keys', of_type=str)
        self._step_metadata_fn = check.opt_callable_param(
            step_metadata_fn, 'step_metadata_fn')

        super(SolidDefinition, self).__init__(
            name=name,
            input_defs=check.list_param(input_defs, 'input_defs',
                                        InputDefinition),
            output_defs=check.list_param(output_defs, 'output_defs',
                                         OutputDefinition),
            description=description,
            metadata=metadata,
        )
 def __init__(
     self,
     name,
     is_persistent,
     required_resource_keys,
     config_schema=None,
     intermediate_storage_creation_fn=None,
     description=None,
 ):
     warnings.warn(
         "IntermediateStorageDefinition and @intermediate_storage are deprecated in 0.10.0 and "
         "will be removed in 0.11.0. Use ObjectManagerDefinition and @object_manager instead, "
         "which gives you better control over how inputs and outputs are handled and loaded."
     )
     self._name = check_valid_name(name)
     self._is_persistent = check.bool_param(is_persistent, "is_persistent")
     self._config_schema = convert_user_facing_definition_config_schema(
         config_schema)
     self._intermediate_storage_creation_fn = check.opt_callable_param(
         intermediate_storage_creation_fn,
         "intermediate_storage_creation_fn")
     self._required_resource_keys = frozenset(
         check.set_param(
             required_resource_keys if required_resource_keys else set(),
             "required_resource_keys",
             of_type=str,
         ))
     self._description = check.opt_str_param(description, "description")
Example #30
0
    def __init__(self, execution_plan, retries, sort_key_fn=None):
        self._plan = check.inst_param(execution_plan, "execution_plan",
                                      ExecutionPlan)
        self._retries = check.inst_param(retries, "retries", Retries)
        self._sort_key_fn = check.opt_callable_param(sort_key_fn,
                                                     "sort_key_fn",
                                                     _default_sort_key)

        self._context_guard = False  # Prevent accidental direct use

        # All steps to be executed start out here in _pending
        self._pending = self._plan.execution_deps()

        # steps move in to these buckets as a result of _update calls
        self._executable = []
        self._pending_skip = []
        self._pending_retry = []
        self._pending_abandon = []
        self._waiting_to_retry = {}

        # then are considered _in_flight when vended via get_steps_to_*
        self._in_flight = set()

        # and finally their terminal state is tracked by these sets, via mark_*
        self._success = set()
        self._failed = set()
        self._skipped = set()
        self._abandoned = set()

        # see verify_complete
        self._unknown_state = set()
        self._interrupted = set()

        # Start the show by loading _executable with the set of _pending steps that have no deps
        self._update()