def __init__(self, name, pipeline_dict=None, pipeline_defs=None): self.name = check.str_param(name, 'name') pipeline_dict = check.opt_dict_param(pipeline_dict, 'pipeline_dict', key_type=str) pipeline_defs = check.opt_list_param(pipeline_defs, 'pipeline_defs', PipelineDefinition) for val in pipeline_dict.values(): check.is_callable(val, 'Value in pipeline_dict must be function') self._lazy_pipeline_dict = pipeline_dict self._pipeline_cache = {} self._pipeline_names = set(pipeline_dict.keys()) for defn in pipeline_defs: check.invariant( defn.name not in self._pipeline_names, 'Duplicate pipelines named {name}'.format(name=defn.name), ) self._pipeline_names.add(defn.name) self._pipeline_cache[defn.name] = defn self._all_pipelines = None self._solid_defs = None
def load(self): if self.loaded: reloader.reload(self.module) self.loaded = True fn = getattr(self.module, self.fn_name) check.is_callable(fn) obj = fn() # Eventually this class will be generic and not coupled to # Pipeline / Repository types. Tracking this issue here: # https://github.com/dagster-io/dagster/issues/246 if self.coerce_to_repo: if isinstance(obj, RepositoryDefinition): self.object = obj elif isinstance(obj, PipelineDefinition): self.object = RepositoryDefinition( name=EMPHERMAL_NAME, pipeline_dict={obj.name: lambda: obj}) else: raise InvalidPipelineLoadingComboError( 'entry point must return a repository or pipeline') else: self.object = obj return self.object
def map(self, fn): check.is_callable(fn) result = fn(InvokedSolidOutputHandle(self.solid_name, self.output_name)) if isinstance(result, InvokedSolidOutputHandle): return InvokedSolidDynamicOutputWrapper(result.solid_name, result.output_name) elif isinstance(result, tuple) and all( map(lambda item: isinstance(item, InvokedSolidOutputHandle), result)): return tuple( map( lambda item: InvokedSolidDynamicOutputWrapper( item.solid_name, item.output_name), result, )) elif result is None: return None elif isinstance(result, InvokedSolidDynamicOutputWrapper): return result else: check.failed( "Could not handle output from map function invoked on " f"{self.solid_name}:{self.output_name}, received {result}")
def load_module_target_function(module_target_function): check.inst_param(module_target_function, 'module_target_function', ModuleTargetFunction) module = importlib.import_module(module_target_function.module_name) fn = getattr(module, module_target_function.fn_name) check.is_callable(fn) return DynamicObject(module, fn, module_target_function.module_name, module_target_function.fn_name, object=fn())
def __init__(self, name, pipeline_dict): self.name = check.str_param(name, 'name') check.dict_param(pipeline_dict, 'pipeline_dict', key_type=str) for val in pipeline_dict.values(): check.is_callable(val, 'Value in pipeline_dict must be function') self.pipeline_dict = pipeline_dict self._pipeline_cache = {}
def load_file_target_function(file_target_function): check.inst_param(file_target_function, 'file_target_function', FileTargetFunction) module_name = os.path.splitext( os.path.basename(file_target_function.python_file))[0] module = imp.load_source(module_name, file_target_function.python_file) fn = getattr(module, file_target_function.fn_name) check.is_callable(fn) return DynamicObject(module, fn, module_name, file_target_function.fn_name, object=fn())
def __init__(self, name, pipeline_dict=None, pipeline_defs=None, experimental=None): self._name = check.str_param(name, 'name') pipeline_dict = check.opt_dict_param(pipeline_dict, 'pipeline_dict', key_type=str) pipeline_defs = check.opt_list_param(pipeline_defs, 'pipeline_defs', PipelineDefinition) # Experimental arguments # TODO: Extract scheduler and scheduler_defs from RepositoryDefinition # https://github.com/dagster-io/dagster/issues/1693 experimental = check.opt_dict_param(experimental, 'experimental') scheduler = check.opt_subclass_param(experimental.get('scheduler'), 'scheduler', Scheduler) schedule_defs = check.opt_list_param(experimental.get('schedule_defs'), 'schedule_defs', ScheduleDefinition) for val in pipeline_dict.values(): check.is_callable(val, 'Value in pipeline_dict must be function') self._lazy_pipeline_dict = pipeline_dict self._pipeline_cache = {} self._pipeline_names = set(pipeline_dict.keys()) for defn in pipeline_defs: check.invariant( defn.name not in self._pipeline_names, 'Duplicate pipelines named {name}'.format(name=defn.name), ) self._pipeline_names.add(defn.name) self._pipeline_cache[defn.name] = defn self._scheduler_type = scheduler self._schedules = {} for defn in schedule_defs: check.invariant( defn.name not in self._schedules, 'Duplicate schedules named {name}'.format(name=defn.name), ) self._schedules[defn.name] = defn self._all_pipelines = None self._solid_defs = None
def reload_dynamic_object(dynamic_obj): check.inst_param(dynamic_obj, 'dynamic_obj', DynamicObject) module_name, fn_name = dynamic_obj.module_name, dynamic_obj.fn_name module = importlib.reload(dynamic_obj.module) fn = getattr(module, fn_name) check.is_callable(fn) return DynamicObject( object=fn(), module=module, fn=fn, module_name=module_name, fn_name=fn_name, )
def _log(self, method, orig_message, message_props): check.str_param(method, 'method') check.str_param(orig_message, 'orig_message') check.dict_param(message_props, 'message_props') check.invariant( 'extra' not in message_props, 'do not allow until explicit support is handled' ) check.invariant( 'exc_info' not in message_props, 'do not allow until explicit support is handled' ) check.invariant('orig_message' not in message_props, 'orig_message reserved value') check.invariant('message' not in message_props, 'message reserved value') check.invariant('log_message_id' not in message_props, 'log_message_id reserved value') check.invariant('log_timestamp' not in message_props, 'log_timestamp reserved value') log_message_id = str(uuid.uuid4()) log_timestamp = datetime.datetime.utcnow().isoformat() synth_props = { 'orig_message': orig_message, 'log_message_id': log_message_id, 'log_timestamp': log_timestamp, 'run_id': self.run_id, } # We first generate all props for the purpose of producing the semi-structured # log message via _kv_messsage all_props = dict( itertools.chain(synth_props.items(), self.tags.items(), message_props.items()) ) msg_with_structured_props = _kv_message(all_props.items()) msg_with_multiline_structured_props = _kv_message(all_props.items(), multiline=True) # So here we use the arbitrary key DAGSTER_META_KEY to store a dictionary of # all the meta information that dagster injects into log message. # The python logging module, in its infinite wisdom, actually takes all the # keys in extra and unconditionally smashes them into the internal dictionary # of the logging.LogRecord class. We used a reserved key here to avoid naming # collisions with internal variables of the LogRecord class. # See __init__.py:363 (makeLogRecord) in the python 3.6 logging module source # for the gory details. # getattr(self.logger, method)( # message_with_structured_props, extra={DAGSTER_META_KEY: all_props} # ) for logger in self.loggers: logger_method = check.is_callable(getattr(logger, method)) if logger.name == DAGSTER_DEFAULT_LOGGER: logger_method( msg_with_multiline_structured_props, extra={DAGSTER_META_KEY: all_props} ) else: logger_method(msg_with_structured_props, extra={DAGSTER_META_KEY: all_props})
def __init__(self, name, pipeline_dict, enforce_uniqueness=True): ''' Args: name (str): Name of pipeline. pipeline_dict (Dict[str, callable]): See top-level class documentation ''' self.name = check.str_param(name, 'name') check.dict_param(pipeline_dict, 'pipeline_dict', key_type=str) for val in pipeline_dict.values(): check.is_callable(val, 'Value in pipeline_dict must be function') self.pipeline_dict = pipeline_dict self._pipeline_cache = {} self.enforce_uniqueness = enforce_uniqueness
def test_is_callable(): def fn(): pass assert check.is_callable(fn) == fn assert check.is_callable(lambda: None) assert check.is_callable(lambda: None, 'some desc') with pytest.raises(CheckError): check.is_callable(None) with pytest.raises(CheckError): check.is_callable(1) with pytest.raises(CheckError, match='some other desc'): check.is_callable(1, 'some other desc')
def __init__(self, name, pipeline_dict, repo_config=None, enforce_solid_def_uniqueness=True): self.name = check.str_param(name, 'name') check.dict_param(pipeline_dict, 'pipeline_dict', key_type=str) for val in pipeline_dict.values(): check.is_callable(val, 'Value in pipeline_dict must be function') self.pipeline_dict = pipeline_dict self._pipeline_cache = {} self.repo_config = check.opt_dict_param(repo_config, 'repo_config') self.enforce_solid_def_uniqueness = check.bool_param( enforce_solid_def_uniqueness, 'enforce_solid_def_uniqueness')
def perform_load(entry): fn = getattr(entry.module, entry.fn_name) check.is_callable(fn) return fn(**entry.kwargs)
def __init__(self, callback): super(StructuredLoggerHandler, self).__init__() self.callback = check.is_callable(callback, 'callback')
def _invoke_logger_method(*args, **kwargs): for logger in self.loggers: logger_method = check.is_callable(getattr(logger, name)) logger_method(*args, **kwargs)
def perform_load(self): fn = getattr(self.module, self.fn_name) check.is_callable(fn) return fn()