Ejemplo n.º 1
0
    def auto_save_param(self, parameter, original_value, current_value):
        # type: (ParameterDefinition, Any, Any) -> None
        # it's output! we are going to save it.
        # task run doesn't always exist
        task_run = try_get_current_task_run()
        access_status = DbndTargetOperationStatus.OK
        try:
            if isinstance(original_value, InMemoryTarget):
                parameter.value_type = get_value_type_of_obj(
                    current_value, parameter.value_type
                )

            parameter.dump_to_target(original_value, current_value)
            # it's a workaround, we don't want to change parameter for outputs (dynamically)
            # however, we need proper value type to "dump" preview an other meta.
            # we will update it only for In memory targets only for now

        except Exception as ex:
            access_status = DbndTargetOperationStatus.NOK
            raise friendly_error.task_execution.failed_to_save_value_to_target(
                ex, self.task, parameter, original_value, current_value
            )
        finally:
            if task_run:
                try:
                    task_run.tracker.log_parameter_data(
                        parameter=parameter,
                        target=original_value,
                        value=current_value,
                        operation_type=DbndTargetOperationType.write,
                        operation_status=access_status,
                    )
                except Exception as ex:
                    logger.warning("Failed to log target to tracking store. %s", ex)
Ejemplo n.º 2
0
def bash_script(
    script=None,
    check_retcode=0,
    cwd=None,
    env=None,
    dbnd_env=True,
    output_encoding="utf-8",
    popen_kwargs=None,
):
    # type:( str, Optional[int],str, Dict[str,str], bool, str, Dict[str,Any]) -> int

    # we need a working folder to create bash script
    task_run = try_get_current_task_run()
    if task_run:
        script_dir = str(task_run.local_task_run_root)
    else:
        script_dir = None

    bash_script_path = os.path.join(script_dir, "bash_cmd.sh")
    with open(bash_script_path, "wb") as bs:
        bs.write(bytes(script, "utf_8"))

    log_metric("bash_script", bash_script_path)

    logger.info("Bash script location: %s", bash_script_path)
    args = ["bash", bash_script_path]
    return bash_cmd.func(
        args=args,
        check_retcode=check_retcode,
        cwd=cwd or script_dir,
        env=env,
        dbnd_env=dbnd_env,
        output_encoding=output_encoding,
        popen_kwargs=popen_kwargs,
    )
Ejemplo n.º 3
0
def set_external_resource_urls(links):
    # type: (dict[str, str]) -> None
    """
    Will add the `links` as external resources, to the current running task.
    If there is no running task or the links are bad formatted it will fail without raising.

    @param links: map between the name of the resource and the link to the resource
    """
    if not isinstance(links, dict):
        logger.warning(
            "Failed to add links as external resources: links is not a dict")
        return

    links = {item: value for item, value in six.iteritems(links) if value}
    if not links:
        logger.warning(
            "Failed to add links as external resources: links is empty dict or dict with None values "
        )
        return

    task_run = try_get_current_task_run()
    if task_run is None:
        logger.warning(
            "Failed to add links as external resources: There is no running task"
        )
        return

    try:
        task_run.set_external_resource_urls(links)
    except Exception as e:
        logger.warning(
            "Failed to add links as external resources: raised {e}".format(
                e=e))
Ejemplo n.º 4
0
    def from_env(cls, current_run):
        # type: (DatabandRun) -> RootRunInfo
        parent_run = try_get_databand_run()
        if parent_run:
            # take from parent
            root_run_info = parent_run.root_run_info
            # update parent run info if required
            root_task_run = try_get_current_task_run()
            if root_task_run:
                root_task_run_uid = root_task_run.task_run_uid
                root_run_info = attr.evolve(
                    root_run_info, root_task_run_uid=root_task_run_uid)

            return root_run_info

        # take from env
        root_run_uid = os.environ.get(DBND_ROOT_RUN_UID)
        root_run_url = os.environ.get(DBND_ROOT_RUN_TRACKER_URL)
        root_task_run_uid = os.environ.get(DBND_PARENT_TASK_RUN_UID)

        if not root_run_uid:
            # current run is the main run
            root_run_uid = current_run.run_uid
            root_run_url = current_run.tracker.run_url

        return cls(
            root_run_uid=root_run_uid,
            root_run_url=root_run_url,
            root_task_run_uid=root_task_run_uid,
        )
Ejemplo n.º 5
0
def heartbeat_manual(log=False, check_time=datetime.datetime.now()):
    cmd = [sys.executable, __file__.replace(".pyc", "py")]

    import logging

    logger = logging.getLogger(__name__)
    if log:
        run = try_get_current_task_run().run
        local_heartbeat_log_file = run.run_local_root.partition(
            name="hearbeat_manual.log")
        heartbeat_log_file = local_heartbeat_log_file
        heartbeat_log_fp = heartbeat_log_file.open("w")
        stdout = heartbeat_log_fp
        logger.error(
            "Starting heartbeat with log at %s using cmd: %s",
            heartbeat_log_file,
            subprocess.list2cmdline(cmd),
        )
    else:
        stdout = None
        logger.error("Starting MANUAL using cmd: %s",
                     subprocess.list2cmdline(cmd))

    sp = subprocess.Popen(cmd, stdout=stdout, stderr=subprocess.STDOUT)

    sleep(10)
    exit_code = sp.wait()
    if exit_code == 0:
        logger.error(" MANUAL finished with 0")
    else:
        logger.error(" MANUAL finished with %s", exit_code)
Ejemplo n.º 6
0
 def auto_save_param(self, parameter, original_value, current_value):
     # type: (ParameterDefinition, Any, Any) -> None
     # it's output! we are going to save it.
     # task run doesn't always exist
     task_run = try_get_current_task_run()
     access_status = DbndTargetOperationStatus.OK
     try:
         parameter.dump_to_target(original_value, current_value)
     except Exception as ex:
         access_status = DbndTargetOperationStatus.NOK
         raise friendly_error.task_execution.failed_to_save_value_to_target(
             ex, self.task, parameter, original_value, current_value)
     finally:
         if self.task.settings.core.auto_save_target_metrics and task_run:
             try:
                 task_run.tracker.log_target(
                     parameter=parameter,
                     target=original_value,
                     value=current_value,
                     operation_type=DbndTargetOperationType.write,
                     operation_status=access_status,
                 )
             except Exception as ex:
                 logger.warning(
                     "Failed to log target to tracking store. %s", ex)
 def task_with_set_external_resource_urls():
     set_external_resource_urls(
         {
             "my_resource": "http://some_resource_name.com/path/to/resource/123456789"
         }
     )
     task_run = try_get_current_task_run()
     return task_run.task_run_attempt_uid
Ejemplo n.º 8
0
def context_to_airflow_vars(context, in_env_var_format=False):
    # original_context_to_airflow_vars is created during function override in patch_models()
    params = airflow.utils.operator_helpers._original_context_to_airflow_vars(
        context=context, in_env_var_format=in_env_var_format)
    if in_env_var_format:
        task_run = try_get_current_task_run()  # type: TaskRun
        if task_run:
            params = extend_airflow_ctx_with_dbnd_tracking_info(
                task_run, params)

    try_number = str(context['task_instance'].try_number)
    params.update({"AIRFLOW_CTX_TRY_NUMBER": try_number})
    return params
Ejemplo n.º 9
0
def try_get_or_create_task_run():
    # type: ()-> Optional[TaskRun]
    task_run = try_get_current_task_run()
    if task_run:
        return task_run

    from dbnd._core.configuration.environ_config import DBND_TASK_RUN_ATTEMPT_UID

    tra_uid = os.environ.get(DBND_TASK_RUN_ATTEMPT_UID)
    if tra_uid:
        return _get_task_run_mock(tra_uid)

    from dbnd._core.inplace_run.inplace_run_manager import try_get_inplace_task_run

    return try_get_inplace_task_run()
Ejemplo n.º 10
0
def try_get_or_create_task_run():
    # type: ()-> TaskRunTracker
    task_run = try_get_current_task_run()
    if task_run:
        return task_run

    try:
        from dbnd._core.task_run.task_run_tracker import TaskRunTracker
        from dbnd._core.configuration.environ_config import DBND_TASK_RUN_ATTEMPT_UID

        tra_uid = os.environ.get(DBND_TASK_RUN_ATTEMPT_UID)
        if tra_uid:
            task_run = TaskRunMock(tra_uid)
            from dbnd import config
            from dbnd._core.settings import CoreConfig

            with config({CoreConfig.tracker_raise_on_error: False},
                        source="ondemand_tracking"):
                tracking_store = CoreConfig().get_tracking_store()
                trt = TaskRunTracker(task_run, tracking_store)
                task_run.tracker = trt
                return task_run

        # let's check if we are in airflow env
        from dbnd._core.inplace_run.airflow_dag_inplace_tracking import (
            try_get_airflow_context, )

        airflow_context = try_get_airflow_context()
        if airflow_context:
            from dbnd._core.inplace_run.airflow_dag_inplace_tracking import (
                get_airflow_tracking_manager, )

            atm = get_airflow_tracking_manager(airflow_context)
            if atm:
                return atm.airflow_operator__task_run
        from dbnd._core.inplace_run.inplace_run_manager import is_inplace_run

        if is_inplace_run():
            return dbnd_run_start()

    except Exception:
        logger.info("Failed during dbnd inplace tracking init.", exc_info=True)
        return None
Ejemplo n.º 11
0
def bash_cmd(
    cmd=None,
    args=None,
    check_retcode=0,
    cwd=None,
    env=None,
    dbnd_env=True,
    output_encoding="utf-8",
    popen_kwargs=None,
    wait_for_termination_s=5,
    shell=False,
):
    # type:( str, List[str], Optional[int], str, Dict[str,str], bool, str, Dict[str,Any]) -> int
    if popen_kwargs is None:
        popen_kwargs = dict()
    popen_kwargs = popen_kwargs.copy()

    if cmd and args:
        raise DatabandConfigError("You should not provide cmd and args ")

    if cmd:
        if shell:
            args = cmd
        else:
            args = shlex.split(cmd)
    elif args:
        args = list(map(str, args))
        cmd = list2cmdline(args)
        if shell:
            args = cmd

    logger.info("Running: " + cmd)  # To simplify rerunning failing tests

    if dbnd_env and DatabandRun.has_instance():
        env = env or os.environ.copy()
        dbnd_env_vars = DatabandRun.get_instance().get_context_spawn_env()
        logger.info(
            "Exporting the following env vars:\n%s",
            "\n".join(["{}={}".format(k, v)
                       for k, v in dbnd_env_vars.items()]),
        )
        env.update()

    def preexec_fn():
        if windows_compatible_mode:
            return
        # Restore default signal disposition and invoke setsid
        for sig in ("SIGPIPE", "SIGXFZ", "SIGXFSZ"):
            if hasattr(signal, sig):
                safe_signal(getattr(signal, sig), signal.SIG_DFL)
        os.setsid()

    process = subprocess.Popen(args,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT,
                               bufsize=-1,
                               universal_newlines=True,
                               env=env,
                               preexec_fn=preexec_fn,
                               cwd=cwd,
                               shell=shell,
                               **popen_kwargs)

    try:
        task_run = try_get_current_task_run()
        if task_run:
            task_run.task.process = process

        logger.info("Process is running, output:")
        # While command is running let read it's output
        output = []
        while True:
            line = process.stdout.readline()
            if line == "" or line == b"":
                break
            line = safe_decode(line, output_encoding).rstrip()

            logger.info("out: %s", line)
            # keep last 1000 lines only
            output.append(line)
            if len(output) > 1500:
                output = output[-1000:]

        returncode = process.wait()
        logger.info("Command exited with return code %s", process.returncode)
        if check_retcode is not None and returncode != check_retcode:
            raise failed_to_run_cmd("Bash command failed",
                                    cmd_str=cmd,
                                    return_code=returncode)
        return returncode
    except Exception:
        logger.info("Received interrupt. Terminating subprocess and waiting")
        try:
            process.terminate()
            process.wait(wait_for_termination_s)
        except Exception:
            pass
        raise
def request_builder(config_name):
    k8s_config = build_task_from_config(task_name=config_name)
    pod = k8s_config.build_pod(task_run=try_get_current_task_run(), cmds=["dummy"])
    return k8s_config.build_kube_pod_req(pod)
def pod_builder(config_name):
    # explicitly build config for k8s
    k8s_config = build_task_from_config(task_name=config_name)
    pod = k8s_config.build_pod(task_run=try_get_current_task_run(), cmds=["dummy"])
    return pod