Beispiel #1
0
def main():
    """Script's main function and start point."""
    _dbnd_exception_handling()
    configure_basic_logging(None)
    if should_fix_pyspark_imports():
        fix_pyspark_imports()
    register_dbnd_plugins()

    # adding all plugins cli commands
    for commands in pm.hook.dbnd_get_commands():
        for command in commands:
            cli.add_command(command)
    try:
        return cli(prog_name="dbnd")

    except KeyboardInterrupt:
        sys.exit(1)

    except Exception as ex:
        from dbnd._core.failures import get_databand_error_message

        msg, code = get_databand_error_message(ex=ex, args=sys.argv[1:])
        logger.error(msg)
        if code is not None:
            sys.exit(code)

    except SystemExit as ex:
        if ex.code == 0:
            # databricks can't handle for a reason exit with an exit code.
            exit()
        else:
            sys.exit(ex.code)
Beispiel #2
0
def run(
    ctx,
    is_help,
    task,
    module,
    _sets,
    _sets_config,
    _sets_root,
    _overrides,
    verbose,
    describe,
    env,
    parallel,
    conf_file,
    task_version,
    project_name,
    name,
    description,
    run_driver,
    alternative_task_name,
    scheduled_job_name,
    scheduled_date,
    interactive,
    submit_driver,
    submit_tasks,
    disable_web_tracker,
):
    """
    Run a task or a DAG

    To see tasks use `dbnd show-tasks` (tab completion is available).
    """

    from dbnd._core.context.databand_context import new_dbnd_context, DatabandContext
    from dbnd._core.utils.structures import combine_mappings
    from dbnd import config

    task_name = task
    # --verbose, --describe, --env, --parallel, --conf-file and --project-name
    # we filter out false flags since otherwise they will always override the config with their falseness
    main_switches = dict(
        databand=filter_dict_remove_false_values(
            dict(
                verbose=verbose > 0,
                describe=describe,
                env=env,
                conf_file=conf_file,
                project_name=project_name,
            )
        ),
        run=filter_dict_remove_false_values(
            dict(
                name=name,
                parallel=parallel,
                description=description,
                is_archived=describe,
            )
        ),
    )

    if submit_driver is not None:
        main_switches["run"]["submit_driver"] = bool(submit_driver)
    if submit_tasks is not None:
        main_switches["run"]["submit_tasks"] = bool(submit_tasks)
    if disable_web_tracker:
        main_switches.setdefault("core", {})["tracker_api"] = "disabled"

    if task_version is not None:
        main_switches["task"] = {"task_version": task_version}

    cmd_line_config = parse_and_build_config_store(
        source="cli", config_values=main_switches
    )

    _sets = list(_sets)
    _sets_config = list(_sets_config)
    _sets_root = list(_sets_root)

    root_task_config = {}
    for _set in _sets_root:
        root_task_config = combine_mappings(left=root_task_config, right=_set)

    # remove all "first level" config values, assume that they are for the main task
    # add them to _sets_root
    for _set in _sets:
        for k, v in list(_set.items()):
            # so json-like values won't be included
            if "." not in k and isinstance(v, six.string_types):
                root_task_config[k] = v
                del _set[k]

    # --set, --set-config
    if _sets:
        cmd_line_config.update(_parse_cli(_sets, source="--set"))
    if _sets_config:
        cmd_line_config.update(_parse_cli(_sets_config, source="--set-config"))
    if _overrides:
        cmd_line_config.update(
            _parse_cli(_overrides, source="--set-override", override=True)
        )
    if interactive:
        cmd_line_config.update(
            _parse_cli([{"run.interactive": True}], source="--interactive")
        )
    if verbose > 1:
        cmd_line_config.update(
            _parse_cli([{"task_build.verbose": True}], source="-v -v")
        )

    if cmd_line_config:
        config.set_values(cmd_line_config, source="cmdline")
    if verbose:
        logger.info("CLI config: \n%s", pformat_config_store_as_table(cmd_line_config))

    # double checking on bootstrap, as we can run from all kind of locations
    # usually we should be bootstraped already as we run from cli.
    dbnd_bootstrap()
    if not config.getboolean("log", "disabled"):
        configure_basic_logging(None)

    scheduled_run_info = None
    if scheduled_job_name:
        scheduled_run_info = ScheduledRunInfo(
            scheduled_job_name=scheduled_job_name, scheduled_date=scheduled_date
        )

    with new_dbnd_context(
        name="run", module=module
    ) as context:  # type: DatabandContext
        task_registry = get_task_registry()

        tasks = task_registry.list_dbnd_task_classes()
        completer.refresh(tasks)

        # modules are loaded, we can load the task
        task_cls = None
        if task_name:
            task_cls = task_registry.get_task_cls(task_name)
            if alternative_task_name:
                task_cls = build_dynamic_task(
                    original_cls=task_cls, new_cls_name=alternative_task_name
                )
                task_name = alternative_task_name

        # --set-root
        # now we can get it config, as it's not main task, we can load config after the configuration is loaded
        if task_cls is not None:
            if root_task_config:
                # adding root task to configuration
                config.set_values(
                    {task_cls.task_definition.task_config_section: root_task_config},
                    source="--set-root",
                )

        if is_help or not task_name:
            print_help(ctx, task_cls)
            return

        return context.dbnd_run_task(
            task_or_task_name=task_name,
            run_uid=run_driver,
            scheduled_run_info=scheduled_run_info,
        )
Beispiel #3
0
def cmd_run(
    ctx,
    is_help,
    task,
    module,
    _sets,
    _sets_config,
    _sets_root,
    _overrides,
    _extend,
    verbose,
    print_task_band,
    describe,
    env,
    parallel,
    conf_file,
    task_version,
    project,
    name,
    description,
    run_driver,
    override_run_uid,
    alternative_task_name,
    job_name,
    scheduled_job_name,
    scheduled_date,
    interactive,
    submit_driver,
    submit_tasks,
    disable_web_tracker,
    open_web_tab,
    docker_build_tag,
):
    """
    Run a task or a DAG

    To see all available tasks use `dbnd show-tasks` (tab completion is available).
    `dbnd show-configs` will print all available configs.
    """

    from dbnd import config
    from dbnd._core.context.databand_context import DatabandContext, new_dbnd_context
    from dbnd._core.utils.structures import combine_mappings

    task_registry = get_task_registry()

    # we need to do it before we are looking for the task cls
    load_user_modules(dbnd_config=config, modules=module)

    task_name = task
    # --verbose, --describe, --env, --parallel, --conf-file and --project
    # we filter out false flags since otherwise they will always override the config with their falseness
    main_switches = dict(
        databand=dict(
            verbose=verbose > 0,
            print_task_band=print_task_band,
            describe=describe,
            env=env,
            conf_file=conf_file,
            project=project,
        ),
        run=dict(
            name=name,
            parallel=parallel,
            interactive=interactive,
            description=description,
            is_archived=describe,
            open_web_tracker_in_browser=open_web_tab,
            submit_driver=_nullable_flag(submit_driver),
            submit_tasks=_nullable_flag(submit_tasks),
        ),
        kubernetes=dict(docker_build_tag=docker_build_tag),
        task=dict(task_version=task_version),
        task_build=dict(verbose=True if verbose > 1 else None),
        core=dict(tracker_api="disabled" if disable_web_tracker else None),
    )

    main_switches = cleanup_empty_switches(main_switches)

    _sets = list(_sets)
    _sets_config = list(_sets_config)
    _sets_root = list(_sets_root)

    root_task_config = {}
    for _set in _sets_root:
        root_task_config = combine_mappings(left=root_task_config, right=_set)

    # remove all "first level" config values, assume that they are for the main task
    # add them to _sets_root
    for _set in _sets:
        for k, v in list(_set.items()):
            # so json-like values won't be included
            if "." not in k and isinstance(v, six.string_types):
                root_task_config[k] = v
                del _set[k]

    cmd_line_config = parse_and_build_config_store(source="cli",
                                                   config_values=main_switches)
    # --set, --set-config
    if _sets:
        cmd_line_config.update(_parse_cli(_sets, source="--set"))
    if _sets_config:
        cmd_line_config.update(_parse_cli(_sets_config, source="--set-config"))
    if _extend:
        cmd_line_config.update(
            _parse_cli(_extend, source="--extend-config", extend=True))
    if _overrides:
        cmd_line_config.update(
            _parse_cli(
                _overrides,
                source="--set-override",
                priority=ConfigValuePriority.OVERRIDE,
            ))

    # --set-root
    if root_task_config:
        task_cls = task_registry.get_task_cls(task_name)
        task_section = task_cls.task_definition.task_config_section
        # adding root task to configuration
        cmd_line_config.update(
            parse_and_build_config_store(
                config_values={task_section: root_task_config},
                source="--set-root"))

    # UPDATE CURRENT CONFIG with CLI values
    if cmd_line_config:
        if verbose:
            logger.info("CLI config: \n%s",
                        pformat_config_store_as_table(cmd_line_config))
        config.set_values(cmd_line_config, source="cmdline")

    # double checking on bootstrap, as we can run from all kind of locations
    # usually we should be bootstraped already as we run from cli.
    dbnd_bootstrap()

    # initialize basic logging (until we get to the context logging
    if not config.getboolean("log", "disabled"):
        configure_basic_logging(None)

    scheduled_run_info = None
    if scheduled_job_name:
        scheduled_run_info = ScheduledRunInfo(
            scheduled_job_name=scheduled_job_name,
            scheduled_date=scheduled_date)

    # update completer
    if config.getboolean("databand", "completer"):
        tasks = task_registry.list_dbnd_task_classes()
        completer.refresh(tasks)

    # bootstrap and modules are loaded, we can load the task
    task_cls = None
    if task_name:
        task_cls = task_registry.get_task_cls(task_name)

    if not task_name:
        print_help(ctx, None)
        return

    if is_help:
        print_help(ctx, task_cls)
        return

    with tracking_mode_context(tracking=False), new_dbnd_context(
            name="run") as context:  # type: DatabandContext
        if context.settings.system.describe:
            # we want to print describe without triggering real run
            logger.info("Building main task '%s'", task_name)
            root_task = get_task_registry().build_dbnd_task(task_name)
            root_task.ctrl.describe_dag.describe_dag()
            # currently there is bug with the click version we have when using python 2
            # so we don't use the click.echo function
            # https://github.com/pallets/click/issues/564
            print("Task %s has been described!" % task_name)
            return root_task
        return context.dbnd_run_task(
            task_or_task_name=task_name,
            force_task_name=alternative_task_name,
            job_name=job_name or alternative_task_name or task_name,
            run_uid=run_driver or override_run_uid,
            existing_run=run_driver is not None,
            scheduled_run_info=scheduled_run_info,
            project=project,
        )
Beispiel #4
0
def pytest_configure(config):
    configure_basic_logging(None)
    set_dbnd_unit_test_mode()
    dbnd_bootstrap()