def inject_some_params(): my_c = MyConfig22() logging.info("my_c: %s", my_c.config_id) config.set_values( config_values={"user_func": { "a": my_c.config_id, "b": "2" }}, source="inject_some_params", )
def test_predict_wine_quality(self): task_config = {"raw_data": data_repo.wines_full} config.set_values( { predict_wine_quality.task_cls.task_definition.task_config_section: task_config }, source="--set-root", ) assert_run_task(predict_wine_quality.task())
def run( ctx, is_help, task, module, _sets, _sets_config, _sets_root, _overrides, verbose, describe, env, parallel, conf_file, task_version, project_name, name, description, run_driver, alternative_task_name, scheduled_job_name, scheduled_date, interactive, submit_driver, submit_tasks, disable_web_tracker, ): """ Run a task or a DAG To see tasks use `dbnd show-tasks` (tab completion is available). """ from dbnd._core.context.databand_context import new_dbnd_context, DatabandContext from dbnd._core.utils.structures import combine_mappings from dbnd import config task_name = task # --verbose, --describe, --env, --parallel, --conf-file and --project-name # we filter out false flags since otherwise they will always override the config with their falseness main_switches = dict( databand=filter_dict_remove_false_values( dict( verbose=verbose > 0, describe=describe, env=env, conf_file=conf_file, project_name=project_name, ) ), run=filter_dict_remove_false_values( dict( name=name, parallel=parallel, description=description, is_archived=describe, ) ), ) if submit_driver is not None: main_switches["run"]["submit_driver"] = bool(submit_driver) if submit_tasks is not None: main_switches["run"]["submit_tasks"] = bool(submit_tasks) if disable_web_tracker: main_switches.setdefault("core", {})["tracker_api"] = "disabled" if task_version is not None: main_switches["task"] = {"task_version": task_version} cmd_line_config = parse_and_build_config_store( source="cli", config_values=main_switches ) _sets = list(_sets) _sets_config = list(_sets_config) _sets_root = list(_sets_root) root_task_config = {} for _set in _sets_root: root_task_config = combine_mappings(left=root_task_config, right=_set) # remove all "first level" config values, assume that they are for the main task # add them to _sets_root for _set in _sets: for k, v in list(_set.items()): # so json-like values won't be included if "." not in k and isinstance(v, six.string_types): root_task_config[k] = v del _set[k] # --set, --set-config if _sets: cmd_line_config.update(_parse_cli(_sets, source="--set")) if _sets_config: cmd_line_config.update(_parse_cli(_sets_config, source="--set-config")) if _overrides: cmd_line_config.update( _parse_cli(_overrides, source="--set-override", override=True) ) if interactive: cmd_line_config.update( _parse_cli([{"run.interactive": True}], source="--interactive") ) if verbose > 1: cmd_line_config.update( _parse_cli([{"task_build.verbose": True}], source="-v -v") ) if cmd_line_config: config.set_values(cmd_line_config, source="cmdline") if verbose: logger.info("CLI config: \n%s", pformat_config_store_as_table(cmd_line_config)) # double checking on bootstrap, as we can run from all kind of locations # usually we should be bootstraped already as we run from cli. dbnd_bootstrap() if not config.getboolean("log", "disabled"): configure_basic_logging(None) scheduled_run_info = None if scheduled_job_name: scheduled_run_info = ScheduledRunInfo( scheduled_job_name=scheduled_job_name, scheduled_date=scheduled_date ) with new_dbnd_context( name="run", module=module ) as context: # type: DatabandContext task_registry = get_task_registry() tasks = task_registry.list_dbnd_task_classes() completer.refresh(tasks) # modules are loaded, we can load the task task_cls = None if task_name: task_cls = task_registry.get_task_cls(task_name) if alternative_task_name: task_cls = build_dynamic_task( original_cls=task_cls, new_cls_name=alternative_task_name ) task_name = alternative_task_name # --set-root # now we can get it config, as it's not main task, we can load config after the configuration is loaded if task_cls is not None: if root_task_config: # adding root task to configuration config.set_values( {task_cls.task_definition.task_config_section: root_task_config}, source="--set-root", ) if is_help or not task_name: print_help(ctx, task_cls) return return context.dbnd_run_task( task_or_task_name=task_name, run_uid=run_driver, scheduled_run_info=scheduled_run_info, )
def cmd_run( ctx, is_help, task, module, _sets, _sets_config, _sets_root, _overrides, _extend, verbose, print_task_band, describe, env, parallel, conf_file, task_version, project, name, description, run_driver, override_run_uid, alternative_task_name, job_name, scheduled_job_name, scheduled_date, interactive, submit_driver, submit_tasks, disable_web_tracker, open_web_tab, docker_build_tag, ): """ Run a task or a DAG To see all available tasks use `dbnd show-tasks` (tab completion is available). `dbnd show-configs` will print all available configs. """ from dbnd import config from dbnd._core.context.databand_context import DatabandContext, new_dbnd_context from dbnd._core.utils.structures import combine_mappings task_registry = get_task_registry() # we need to do it before we are looking for the task cls load_user_modules(dbnd_config=config, modules=module) task_name = task # --verbose, --describe, --env, --parallel, --conf-file and --project # we filter out false flags since otherwise they will always override the config with their falseness main_switches = dict( databand=dict( verbose=verbose > 0, print_task_band=print_task_band, describe=describe, env=env, conf_file=conf_file, project=project, ), run=dict( name=name, parallel=parallel, interactive=interactive, description=description, is_archived=describe, open_web_tracker_in_browser=open_web_tab, submit_driver=_nullable_flag(submit_driver), submit_tasks=_nullable_flag(submit_tasks), ), kubernetes=dict(docker_build_tag=docker_build_tag), task=dict(task_version=task_version), task_build=dict(verbose=True if verbose > 1 else None), core=dict(tracker_api="disabled" if disable_web_tracker else None), ) main_switches = cleanup_empty_switches(main_switches) _sets = list(_sets) _sets_config = list(_sets_config) _sets_root = list(_sets_root) root_task_config = {} for _set in _sets_root: root_task_config = combine_mappings(left=root_task_config, right=_set) # remove all "first level" config values, assume that they are for the main task # add them to _sets_root for _set in _sets: for k, v in list(_set.items()): # so json-like values won't be included if "." not in k and isinstance(v, six.string_types): root_task_config[k] = v del _set[k] cmd_line_config = parse_and_build_config_store(source="cli", config_values=main_switches) # --set, --set-config if _sets: cmd_line_config.update(_parse_cli(_sets, source="--set")) if _sets_config: cmd_line_config.update(_parse_cli(_sets_config, source="--set-config")) if _extend: cmd_line_config.update( _parse_cli(_extend, source="--extend-config", extend=True)) if _overrides: cmd_line_config.update( _parse_cli( _overrides, source="--set-override", priority=ConfigValuePriority.OVERRIDE, )) # --set-root if root_task_config: task_cls = task_registry.get_task_cls(task_name) task_section = task_cls.task_definition.task_config_section # adding root task to configuration cmd_line_config.update( parse_and_build_config_store( config_values={task_section: root_task_config}, source="--set-root")) # UPDATE CURRENT CONFIG with CLI values if cmd_line_config: if verbose: logger.info("CLI config: \n%s", pformat_config_store_as_table(cmd_line_config)) config.set_values(cmd_line_config, source="cmdline") # double checking on bootstrap, as we can run from all kind of locations # usually we should be bootstraped already as we run from cli. dbnd_bootstrap() # initialize basic logging (until we get to the context logging if not config.getboolean("log", "disabled"): configure_basic_logging(None) scheduled_run_info = None if scheduled_job_name: scheduled_run_info = ScheduledRunInfo( scheduled_job_name=scheduled_job_name, scheduled_date=scheduled_date) # update completer if config.getboolean("databand", "completer"): tasks = task_registry.list_dbnd_task_classes() completer.refresh(tasks) # bootstrap and modules are loaded, we can load the task task_cls = None if task_name: task_cls = task_registry.get_task_cls(task_name) if not task_name: print_help(ctx, None) return if is_help: print_help(ctx, task_cls) return with tracking_mode_context(tracking=False), new_dbnd_context( name="run") as context: # type: DatabandContext if context.settings.system.describe: # we want to print describe without triggering real run logger.info("Building main task '%s'", task_name) root_task = get_task_registry().build_dbnd_task(task_name) root_task.ctrl.describe_dag.describe_dag() # currently there is bug with the click version we have when using python 2 # so we don't use the click.echo function # https://github.com/pallets/click/issues/564 print("Task %s has been described!" % task_name) return root_task return context.dbnd_run_task( task_or_task_name=task_name, force_task_name=alternative_task_name, job_name=job_name or alternative_task_name or task_name, run_uid=run_driver or override_run_uid, existing_run=run_driver is not None, scheduled_run_info=scheduled_run_info, project=project, )
def user_code_load_config(): config.set_values(AutoloadedConfig().get_autoloaded_config(), "user_config")