Beispiel #1
0
    def __init__(self, name, pipeline_dict=None, pipeline_defs=None):
        self.name = check.str_param(name, 'name')

        pipeline_dict = check.opt_dict_param(pipeline_dict,
                                             'pipeline_dict',
                                             key_type=str)
        pipeline_defs = check.opt_list_param(pipeline_defs, 'pipeline_defs',
                                             PipelineDefinition)

        for val in pipeline_dict.values():
            check.is_callable(val, 'Value in pipeline_dict must be function')

        self._lazy_pipeline_dict = pipeline_dict

        self._pipeline_cache = {}
        self._pipeline_names = set(pipeline_dict.keys())
        for defn in pipeline_defs:
            check.invariant(
                defn.name not in self._pipeline_names,
                'Duplicate pipelines named {name}'.format(name=defn.name),
            )
            self._pipeline_names.add(defn.name)
            self._pipeline_cache[defn.name] = defn

        self._all_pipelines = None
        self._solid_defs = None
Beispiel #2
0
    def load(self):
        if self.loaded:
            reloader.reload(self.module)
        self.loaded = True

        fn = getattr(self.module, self.fn_name)
        check.is_callable(fn)
        obj = fn()

        # Eventually this class will be generic and not coupled to
        # Pipeline / Repository types. Tracking this issue here:
        # https://github.com/dagster-io/dagster/issues/246
        if self.coerce_to_repo:
            if isinstance(obj, RepositoryDefinition):
                self.object = obj
            elif isinstance(obj, PipelineDefinition):
                self.object = RepositoryDefinition(
                    name=EMPHERMAL_NAME, pipeline_dict={obj.name: lambda: obj})
            else:
                raise InvalidPipelineLoadingComboError(
                    'entry point must return a repository or pipeline')
        else:
            self.object = obj

        return self.object
Beispiel #3
0
    def map(self, fn):
        check.is_callable(fn)
        result = fn(InvokedSolidOutputHandle(self.solid_name,
                                             self.output_name))

        if isinstance(result, InvokedSolidOutputHandle):
            return InvokedSolidDynamicOutputWrapper(result.solid_name,
                                                    result.output_name)
        elif isinstance(result, tuple) and all(
                map(lambda item: isinstance(item, InvokedSolidOutputHandle),
                    result)):
            return tuple(
                map(
                    lambda item: InvokedSolidDynamicOutputWrapper(
                        item.solid_name, item.output_name),
                    result,
                ))
        elif result is None:
            return None
        elif isinstance(result, InvokedSolidDynamicOutputWrapper):
            return result
        else:
            check.failed(
                "Could not handle output from map function invoked on "
                f"{self.solid_name}:{self.output_name}, received {result}")
Beispiel #4
0
def load_module_target_function(module_target_function):
    check.inst_param(module_target_function, 'module_target_function',
                     ModuleTargetFunction)
    module = importlib.import_module(module_target_function.module_name)
    fn = getattr(module, module_target_function.fn_name)
    check.is_callable(fn)
    return DynamicObject(module,
                         fn,
                         module_target_function.module_name,
                         module_target_function.fn_name,
                         object=fn())
Beispiel #5
0
    def __init__(self, name, pipeline_dict):
        self.name = check.str_param(name, 'name')

        check.dict_param(pipeline_dict, 'pipeline_dict', key_type=str)

        for val in pipeline_dict.values():
            check.is_callable(val, 'Value in pipeline_dict must be function')

        self.pipeline_dict = pipeline_dict

        self._pipeline_cache = {}
Beispiel #6
0
def load_file_target_function(file_target_function):
    check.inst_param(file_target_function, 'file_target_function',
                     FileTargetFunction)
    module_name = os.path.splitext(
        os.path.basename(file_target_function.python_file))[0]
    module = imp.load_source(module_name, file_target_function.python_file)
    fn = getattr(module, file_target_function.fn_name)
    check.is_callable(fn)
    return DynamicObject(module,
                         fn,
                         module_name,
                         file_target_function.fn_name,
                         object=fn())
Beispiel #7
0
    def __init__(self,
                 name,
                 pipeline_dict=None,
                 pipeline_defs=None,
                 experimental=None):
        self._name = check.str_param(name, 'name')

        pipeline_dict = check.opt_dict_param(pipeline_dict,
                                             'pipeline_dict',
                                             key_type=str)
        pipeline_defs = check.opt_list_param(pipeline_defs, 'pipeline_defs',
                                             PipelineDefinition)

        # Experimental arguments
        # TODO: Extract scheduler and scheduler_defs from RepositoryDefinition
        # https://github.com/dagster-io/dagster/issues/1693
        experimental = check.opt_dict_param(experimental, 'experimental')
        scheduler = check.opt_subclass_param(experimental.get('scheduler'),
                                             'scheduler', Scheduler)
        schedule_defs = check.opt_list_param(experimental.get('schedule_defs'),
                                             'schedule_defs',
                                             ScheduleDefinition)

        for val in pipeline_dict.values():
            check.is_callable(val, 'Value in pipeline_dict must be function')

        self._lazy_pipeline_dict = pipeline_dict

        self._pipeline_cache = {}
        self._pipeline_names = set(pipeline_dict.keys())
        for defn in pipeline_defs:
            check.invariant(
                defn.name not in self._pipeline_names,
                'Duplicate pipelines named {name}'.format(name=defn.name),
            )
            self._pipeline_names.add(defn.name)
            self._pipeline_cache[defn.name] = defn

        self._scheduler_type = scheduler
        self._schedules = {}
        for defn in schedule_defs:
            check.invariant(
                defn.name not in self._schedules,
                'Duplicate schedules named {name}'.format(name=defn.name),
            )
            self._schedules[defn.name] = defn

        self._all_pipelines = None
        self._solid_defs = None
Beispiel #8
0
def reload_dynamic_object(dynamic_obj):
    check.inst_param(dynamic_obj, 'dynamic_obj', DynamicObject)

    module_name, fn_name = dynamic_obj.module_name, dynamic_obj.fn_name

    module = importlib.reload(dynamic_obj.module)
    fn = getattr(module, fn_name)
    check.is_callable(fn)

    return DynamicObject(
        object=fn(),
        module=module,
        fn=fn,
        module_name=module_name,
        fn_name=fn_name,
    )
Beispiel #9
0
    def _log(self, method, orig_message, message_props):
        check.str_param(method, 'method')
        check.str_param(orig_message, 'orig_message')
        check.dict_param(message_props, 'message_props')

        check.invariant(
            'extra' not in message_props, 'do not allow until explicit support is handled'
        )
        check.invariant(
            'exc_info' not in message_props, 'do not allow until explicit support is handled'
        )

        check.invariant('orig_message' not in message_props, 'orig_message reserved value')
        check.invariant('message' not in message_props, 'message reserved value')
        check.invariant('log_message_id' not in message_props, 'log_message_id reserved value')
        check.invariant('log_timestamp' not in message_props, 'log_timestamp reserved value')

        log_message_id = str(uuid.uuid4())

        log_timestamp = datetime.datetime.utcnow().isoformat()

        synth_props = {
            'orig_message': orig_message,
            'log_message_id': log_message_id,
            'log_timestamp': log_timestamp,
            'run_id': self.run_id,
        }

        # We first generate all props for the purpose of producing the semi-structured
        # log message via _kv_messsage
        all_props = dict(
            itertools.chain(synth_props.items(), self.tags.items(), message_props.items())
        )

        msg_with_structured_props = _kv_message(all_props.items())
        msg_with_multiline_structured_props = _kv_message(all_props.items(), multiline=True)

        # So here we use the arbitrary key DAGSTER_META_KEY to store a dictionary of
        # all the meta information that dagster injects into log message.
        # The python logging module, in its infinite wisdom, actually takes all the
        # keys in extra and unconditionally smashes them into the internal dictionary
        # of the logging.LogRecord class. We used a reserved key here to avoid naming
        # collisions with internal variables of the LogRecord class.
        # See __init__.py:363 (makeLogRecord) in the python 3.6 logging module source
        # for the gory details.
        # getattr(self.logger, method)(
        #     message_with_structured_props, extra={DAGSTER_META_KEY: all_props}
        # )

        for logger in self.loggers:
            logger_method = check.is_callable(getattr(logger, method))
            if logger.name == DAGSTER_DEFAULT_LOGGER:
                logger_method(
                    msg_with_multiline_structured_props, extra={DAGSTER_META_KEY: all_props}
                )
            else:
                logger_method(msg_with_structured_props, extra={DAGSTER_META_KEY: all_props})
Beispiel #10
0
    def __init__(self, name, pipeline_dict, enforce_uniqueness=True):
        '''
        Args:
            name (str): Name of pipeline.
            pipeline_dict (Dict[str, callable]): See top-level class documentation
        '''
        self.name = check.str_param(name, 'name')

        check.dict_param(pipeline_dict, 'pipeline_dict', key_type=str)

        for val in pipeline_dict.values():
            check.is_callable(val, 'Value in pipeline_dict must be function')

        self.pipeline_dict = pipeline_dict

        self._pipeline_cache = {}

        self.enforce_uniqueness = enforce_uniqueness
Beispiel #11
0
def test_is_callable():
    def fn():
        pass

    assert check.is_callable(fn) == fn
    assert check.is_callable(lambda: None)
    assert check.is_callable(lambda: None, 'some desc')

    with pytest.raises(CheckError):
        check.is_callable(None)

    with pytest.raises(CheckError):
        check.is_callable(1)

    with pytest.raises(CheckError, match='some other desc'):
        check.is_callable(1, 'some other desc')
Beispiel #12
0
    def __init__(self,
                 name,
                 pipeline_dict,
                 repo_config=None,
                 enforce_solid_def_uniqueness=True):
        self.name = check.str_param(name, 'name')

        check.dict_param(pipeline_dict, 'pipeline_dict', key_type=str)

        for val in pipeline_dict.values():
            check.is_callable(val, 'Value in pipeline_dict must be function')

        self.pipeline_dict = pipeline_dict

        self._pipeline_cache = {}

        self.repo_config = check.opt_dict_param(repo_config, 'repo_config')

        self.enforce_solid_def_uniqueness = check.bool_param(
            enforce_solid_def_uniqueness, 'enforce_solid_def_uniqueness')
Beispiel #13
0
def perform_load(entry):
    fn = getattr(entry.module, entry.fn_name)
    check.is_callable(fn)
    return fn(**entry.kwargs)
Beispiel #14
0
 def __init__(self, callback):
     super(StructuredLoggerHandler, self).__init__()
     self.callback = check.is_callable(callback, 'callback')
Beispiel #15
0
 def _invoke_logger_method(*args, **kwargs):
     for logger in self.loggers:
         logger_method = check.is_callable(getattr(logger, name))
         logger_method(*args, **kwargs)
Beispiel #16
0
 def perform_load(self):
     fn = getattr(self.module, self.fn_name)
     check.is_callable(fn)
     return fn()