def project_init(ctx, overwrite, dbnd_home, dbnd_system): """Initialize the project structure""" from dbnd._core.errors import DatabandSystemError from dbnd import databand_lib_path os.environ["SKIP_DAGS_PARSING"] = "True" # Exclude airflow dag examples conf_folder = databand_lib_path("conf/project_init") if os.path.exists(os.path.join(dbnd_home, "project.cfg")): if not overwrite: raise DatabandSystemError( "You are trying to re-initialize your project. You already have dbnd configuration at %s. " "You can force project-init by providing --overwrite flag. " "If you need to create/update database use `dbnd db init` instead" % dbnd_system ) logger.warning( "You are re-initializing your project, all files at %s are going to be over written!" % dbnd_home ) copy_tree(conf_folder, dbnd_home) click.echo("Databand project has been initialized at %s" % dbnd_home) return
def project_init(ctx, no_init_db, overwrite, dbnd_home, dbnd_system): """Initialize the project structure and local db""" from dbnd._core.errors import DatabandSystemError from dbnd import databand_lib_path os.environ["SKIP_DAGS_PARSING"] = "True" # Exclude airflow dag examples conf_folder = databand_lib_path("conf/project_init") project_name = os.path.basename(dbnd_home) output_dir = os.path.dirname(dbnd_home) if os.path.exists(os.path.join(dbnd_home, "project.cfg")): if not overwrite: raise DatabandSystemError( "You are trying to re-initialize your project. You already have dbnd configuration at %s. " "You can force project-init by providing --overwrite flag. " "If you need to create/update database use `dbnd db init` instead" % dbnd_system) logger.warning( "You are re-initializing your project, all files at %s are going to be over written!" % dbnd_home) copy_tree(conf_folder, dbnd_home) click.echo("Databand project has been initialized at %s" % dbnd_home) config.load_system_configs(force=True) if no_init_db: click.echo("Don't forget to run `dbnd db init` ") return if is_web_enabled(): from dbnd_web.cmd_db import init as db_init ctx.invoke(db_init)
def _click_echo_jobs(jobs): from dbnd._core.utils.object_utils import tabulate_objects headers = get_current_context().obj["headers"] job_objects = [ ScheduledJobNamedTuple(**dict(j.pop("DbndScheduledJob"), **j)) for j in jobs ] click.echo(tabulate_objects(job_objects, headers=headers))
def delete(name, force): """Delete scheduled job""" if not force: to_delete = get_scheduled_jobs(name_pattern=name) if not to_delete: click.echo("no jobs found matching the given name pattern") return click.echo("the following jobs will be deleted:") _click_echo_jobs(to_delete) click.confirm("are you sure?", abort=True) delete_scheduled_job(name)
def print_help(ctx, task_cls): from dbnd._core.parameter.parameter_definition import _ParameterKind formatter = ctx.make_formatter() run.format_help(ctx, formatter) if task_cls: dl = [] for (param_name, param_obj) in task_cls.task_definition.task_params.items(): if param_obj.system or param_obj.kind == _ParameterKind.task_output: continue param_help = _help(param_obj.description) dl.append(("-r " + param_name, param_help)) with formatter.section("Task"): formatter.write_dl(dl) click.echo(formatter.getvalue().rstrip("\n"), color=ctx.color)
def _list_tasks(ctx, module, search, is_config): from dbnd import Config from dbnd._core.context.databand_context import new_dbnd_context from dbnd._core.parameter.parameter_definition import _ParameterKind formatter = ctx.make_formatter() load_user_modules(config, modules=module) with new_dbnd_context(): tasks = get_task_registry().list_dbnd_task_classes() for task_cls in tasks: td = task_cls.task_definition full_task_family = td.full_task_family task_family = td.task_family if not (task_family.startswith(search) or full_task_family.startswith(search)): continue if issubclass(task_cls, Config) != is_config: continue dl = [] for param_name, param_obj in td.task_param_defs.items(): if param_obj.system or param_obj.kind == _ParameterKind.task_output: continue if not is_config and param_name in COMMON_PARAMS: continue param_help = _help(param_obj.description) dl.append((param_name, param_help)) if dl: with formatter.section( "{task_family} ({full_task_family})".format( full_task_family=full_task_family, task_family=task_family ) ): formatter.write_dl(dl) click.echo(formatter.getvalue().rstrip("\n"))
def pause(name): """Pause scheduled job""" set_scheduled_job_active(name, False) click.echo('Scheduled job "%s" paused' % name)
def enable(name): """Enable scheduled job""" set_scheduled_job_active(name, True) click.echo('Scheduled job "%s" is enabled' % name)
def run( ctx, is_help, task, module, _sets, _sets_config, _sets_root, _overrides, verbose, print_task_band, describe, env, parallel, conf_file, task_version, project_name, name, description, run_driver, alternative_task_name, scheduled_job_name, scheduled_date, interactive, submit_driver, submit_tasks, disable_web_tracker, open_web_tab, docker_build_tag, ): """ Run a task or a DAG To see tasks use `dbnd show-tasks` (tab completion is available). """ from dbnd._core.context.databand_context import new_dbnd_context, DatabandContext from dbnd._core.utils.structures import combine_mappings from dbnd import config task_name = task # --verbose, --describe, --env, --parallel, --conf-file and --project-name # we filter out false flags since otherwise they will always override the config with their falseness main_switches = dict( databand=filter_dict_remove_false_values( dict( verbose=verbose > 0, task_band=print_task_band, describe=describe, env=env, conf_file=conf_file, project_name=project_name, )), run=filter_dict_remove_false_values( dict( name=name, parallel=parallel, description=description, is_archived=describe, )), ) if submit_driver is not None: main_switches["run"]["submit_driver"] = bool(submit_driver) if submit_tasks is not None: main_switches["run"]["submit_tasks"] = bool(submit_tasks) if disable_web_tracker: main_switches.setdefault("core", {})["tracker_api"] = "disabled" if task_version is not None: main_switches["task"] = {"task_version": task_version} cmd_line_config = parse_and_build_config_store(source="cli", config_values=main_switches) _sets = list(_sets) _sets_config = list(_sets_config) _sets_root = list(_sets_root) root_task_config = {} for _set in _sets_root: root_task_config = combine_mappings(left=root_task_config, right=_set) # remove all "first level" config values, assume that they are for the main task # add them to _sets_root for _set in _sets: for k, v in list(_set.items()): # so json-like values won't be included if "." not in k and isinstance(v, six.string_types): root_task_config[k] = v del _set[k] # --set, --set-config if _sets: cmd_line_config.update(_parse_cli(_sets, source="--set")) if _sets_config: cmd_line_config.update(_parse_cli(_sets_config, source="--set-config")) if _overrides: cmd_line_config.update( _parse_cli(_overrides, source="--set-override", override=True)) if interactive: cmd_line_config.update( _parse_cli([{ "run.interactive": True }], source="--interactive")) if verbose > 1: cmd_line_config.update( _parse_cli([{ "task_build.verbose": True }], source="-v -v")) if cmd_line_config: config.set_values(cmd_line_config, source="cmdline") if verbose: logger.info("CLI config: \n%s", pformat_config_store_as_table(cmd_line_config)) # double checking on bootstrap, as we can run from all kind of locations # usually we should be bootstraped already as we run from cli. dbnd_bootstrap() if not config.getboolean("log", "disabled"): configure_basic_logging(None) scheduled_run_info = None if scheduled_job_name: scheduled_run_info = ScheduledRunInfo( scheduled_job_name=scheduled_job_name, scheduled_date=scheduled_date) with tracking_mode_context(tracking=False), new_dbnd_context( name="run", module=module) as context: # type: DatabandContext task_registry = get_task_registry() tasks = task_registry.list_dbnd_task_classes() completer.refresh(tasks) # modules are loaded, we can load the task task_cls = None if task_name: task_cls = task_registry.get_task_cls(task_name) if alternative_task_name: task_cls = build_dynamic_task( original_cls=task_cls, new_cls_name=alternative_task_name) task_name = alternative_task_name # --set-root # now we can get it config, as it's not main task, we can load config after the configuration is loaded if task_cls is not None and root_task_config: # adding root task to configuration config.set_values( { task_cls.task_definition.task_config_section: root_task_config }, source="--set-root", ) if is_help or not task_name: print_help(ctx, task_cls) return if open_web_tab: config.set_values({"run": {"open_web_tracker_in_browser": "True"}}) if docker_build_tag: config.set_values( {"kubernetes": { "docker_build_tag": docker_build_tag }}) if context.settings.system.describe: # we want to print describe without triggering real run logger.info("Building main task '%s'", task_name) root_task = get_task_registry().build_dbnd_task(task_name) root_task.ctrl.describe_dag.describe_dag() click.echo("Task %s has been described!" % task_name) return root_task return context.dbnd_run_task( task_or_task_name=task_name, run_uid=run_driver, scheduled_run_info=scheduled_run_info, )
def pause(scheduled_job_service, name): """Pause scheduled job""" scheduled_job_service.set_active(name, False) click.echo('Scheduled job "%s" paused' % name)
def enable(scheduled_job_service, name): """Enable scheduled job""" scheduled_job_service.set_active(name, True) click.echo('Scheduled job "%s" is enabled' % name)