def auto_save_param(self, parameter, original_value, current_value): # type: (ParameterDefinition, Any, Any) -> None # it's output! we are going to save it. # task run doesn't always exist task_run = try_get_current_task_run() access_status = DbndTargetOperationStatus.OK try: if isinstance(original_value, InMemoryTarget): parameter.value_type = get_value_type_of_obj( current_value, parameter.value_type ) parameter.dump_to_target(original_value, current_value) # it's a workaround, we don't want to change parameter for outputs (dynamically) # however, we need proper value type to "dump" preview an other meta. # we will update it only for In memory targets only for now except Exception as ex: access_status = DbndTargetOperationStatus.NOK raise friendly_error.task_execution.failed_to_save_value_to_target( ex, self.task, parameter, original_value, current_value ) finally: if task_run: try: task_run.tracker.log_parameter_data( parameter=parameter, target=original_value, value=current_value, operation_type=DbndTargetOperationType.write, operation_status=access_status, ) except Exception as ex: logger.warning("Failed to log target to tracking store. %s", ex)
def bash_script( script=None, check_retcode=0, cwd=None, env=None, dbnd_env=True, output_encoding="utf-8", popen_kwargs=None, ): # type:( str, Optional[int],str, Dict[str,str], bool, str, Dict[str,Any]) -> int # we need a working folder to create bash script task_run = try_get_current_task_run() if task_run: script_dir = str(task_run.local_task_run_root) else: script_dir = None bash_script_path = os.path.join(script_dir, "bash_cmd.sh") with open(bash_script_path, "wb") as bs: bs.write(bytes(script, "utf_8")) log_metric("bash_script", bash_script_path) logger.info("Bash script location: %s", bash_script_path) args = ["bash", bash_script_path] return bash_cmd.func( args=args, check_retcode=check_retcode, cwd=cwd or script_dir, env=env, dbnd_env=dbnd_env, output_encoding=output_encoding, popen_kwargs=popen_kwargs, )
def set_external_resource_urls(links): # type: (dict[str, str]) -> None """ Will add the `links` as external resources, to the current running task. If there is no running task or the links are bad formatted it will fail without raising. @param links: map between the name of the resource and the link to the resource """ if not isinstance(links, dict): logger.warning( "Failed to add links as external resources: links is not a dict") return links = {item: value for item, value in six.iteritems(links) if value} if not links: logger.warning( "Failed to add links as external resources: links is empty dict or dict with None values " ) return task_run = try_get_current_task_run() if task_run is None: logger.warning( "Failed to add links as external resources: There is no running task" ) return try: task_run.set_external_resource_urls(links) except Exception as e: logger.warning( "Failed to add links as external resources: raised {e}".format( e=e))
def from_env(cls, current_run): # type: (DatabandRun) -> RootRunInfo parent_run = try_get_databand_run() if parent_run: # take from parent root_run_info = parent_run.root_run_info # update parent run info if required root_task_run = try_get_current_task_run() if root_task_run: root_task_run_uid = root_task_run.task_run_uid root_run_info = attr.evolve( root_run_info, root_task_run_uid=root_task_run_uid) return root_run_info # take from env root_run_uid = os.environ.get(DBND_ROOT_RUN_UID) root_run_url = os.environ.get(DBND_ROOT_RUN_TRACKER_URL) root_task_run_uid = os.environ.get(DBND_PARENT_TASK_RUN_UID) if not root_run_uid: # current run is the main run root_run_uid = current_run.run_uid root_run_url = current_run.tracker.run_url return cls( root_run_uid=root_run_uid, root_run_url=root_run_url, root_task_run_uid=root_task_run_uid, )
def heartbeat_manual(log=False, check_time=datetime.datetime.now()): cmd = [sys.executable, __file__.replace(".pyc", "py")] import logging logger = logging.getLogger(__name__) if log: run = try_get_current_task_run().run local_heartbeat_log_file = run.run_local_root.partition( name="hearbeat_manual.log") heartbeat_log_file = local_heartbeat_log_file heartbeat_log_fp = heartbeat_log_file.open("w") stdout = heartbeat_log_fp logger.error( "Starting heartbeat with log at %s using cmd: %s", heartbeat_log_file, subprocess.list2cmdline(cmd), ) else: stdout = None logger.error("Starting MANUAL using cmd: %s", subprocess.list2cmdline(cmd)) sp = subprocess.Popen(cmd, stdout=stdout, stderr=subprocess.STDOUT) sleep(10) exit_code = sp.wait() if exit_code == 0: logger.error(" MANUAL finished with 0") else: logger.error(" MANUAL finished with %s", exit_code)
def auto_save_param(self, parameter, original_value, current_value): # type: (ParameterDefinition, Any, Any) -> None # it's output! we are going to save it. # task run doesn't always exist task_run = try_get_current_task_run() access_status = DbndTargetOperationStatus.OK try: parameter.dump_to_target(original_value, current_value) except Exception as ex: access_status = DbndTargetOperationStatus.NOK raise friendly_error.task_execution.failed_to_save_value_to_target( ex, self.task, parameter, original_value, current_value) finally: if self.task.settings.core.auto_save_target_metrics and task_run: try: task_run.tracker.log_target( parameter=parameter, target=original_value, value=current_value, operation_type=DbndTargetOperationType.write, operation_status=access_status, ) except Exception as ex: logger.warning( "Failed to log target to tracking store. %s", ex)
def task_with_set_external_resource_urls(): set_external_resource_urls( { "my_resource": "http://some_resource_name.com/path/to/resource/123456789" } ) task_run = try_get_current_task_run() return task_run.task_run_attempt_uid
def context_to_airflow_vars(context, in_env_var_format=False): # original_context_to_airflow_vars is created during function override in patch_models() params = airflow.utils.operator_helpers._original_context_to_airflow_vars( context=context, in_env_var_format=in_env_var_format) if in_env_var_format: task_run = try_get_current_task_run() # type: TaskRun if task_run: params = extend_airflow_ctx_with_dbnd_tracking_info( task_run, params) try_number = str(context['task_instance'].try_number) params.update({"AIRFLOW_CTX_TRY_NUMBER": try_number}) return params
def try_get_or_create_task_run(): # type: ()-> Optional[TaskRun] task_run = try_get_current_task_run() if task_run: return task_run from dbnd._core.configuration.environ_config import DBND_TASK_RUN_ATTEMPT_UID tra_uid = os.environ.get(DBND_TASK_RUN_ATTEMPT_UID) if tra_uid: return _get_task_run_mock(tra_uid) from dbnd._core.inplace_run.inplace_run_manager import try_get_inplace_task_run return try_get_inplace_task_run()
def try_get_or_create_task_run(): # type: ()-> TaskRunTracker task_run = try_get_current_task_run() if task_run: return task_run try: from dbnd._core.task_run.task_run_tracker import TaskRunTracker from dbnd._core.configuration.environ_config import DBND_TASK_RUN_ATTEMPT_UID tra_uid = os.environ.get(DBND_TASK_RUN_ATTEMPT_UID) if tra_uid: task_run = TaskRunMock(tra_uid) from dbnd import config from dbnd._core.settings import CoreConfig with config({CoreConfig.tracker_raise_on_error: False}, source="ondemand_tracking"): tracking_store = CoreConfig().get_tracking_store() trt = TaskRunTracker(task_run, tracking_store) task_run.tracker = trt return task_run # let's check if we are in airflow env from dbnd._core.inplace_run.airflow_dag_inplace_tracking import ( try_get_airflow_context, ) airflow_context = try_get_airflow_context() if airflow_context: from dbnd._core.inplace_run.airflow_dag_inplace_tracking import ( get_airflow_tracking_manager, ) atm = get_airflow_tracking_manager(airflow_context) if atm: return atm.airflow_operator__task_run from dbnd._core.inplace_run.inplace_run_manager import is_inplace_run if is_inplace_run(): return dbnd_run_start() except Exception: logger.info("Failed during dbnd inplace tracking init.", exc_info=True) return None
def bash_cmd( cmd=None, args=None, check_retcode=0, cwd=None, env=None, dbnd_env=True, output_encoding="utf-8", popen_kwargs=None, wait_for_termination_s=5, shell=False, ): # type:( str, List[str], Optional[int], str, Dict[str,str], bool, str, Dict[str,Any]) -> int if popen_kwargs is None: popen_kwargs = dict() popen_kwargs = popen_kwargs.copy() if cmd and args: raise DatabandConfigError("You should not provide cmd and args ") if cmd: if shell: args = cmd else: args = shlex.split(cmd) elif args: args = list(map(str, args)) cmd = list2cmdline(args) if shell: args = cmd logger.info("Running: " + cmd) # To simplify rerunning failing tests if dbnd_env and DatabandRun.has_instance(): env = env or os.environ.copy() dbnd_env_vars = DatabandRun.get_instance().get_context_spawn_env() logger.info( "Exporting the following env vars:\n%s", "\n".join(["{}={}".format(k, v) for k, v in dbnd_env_vars.items()]), ) env.update() def preexec_fn(): if windows_compatible_mode: return # Restore default signal disposition and invoke setsid for sig in ("SIGPIPE", "SIGXFZ", "SIGXFSZ"): if hasattr(signal, sig): safe_signal(getattr(signal, sig), signal.SIG_DFL) os.setsid() process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1, universal_newlines=True, env=env, preexec_fn=preexec_fn, cwd=cwd, shell=shell, **popen_kwargs) try: task_run = try_get_current_task_run() if task_run: task_run.task.process = process logger.info("Process is running, output:") # While command is running let read it's output output = [] while True: line = process.stdout.readline() if line == "" or line == b"": break line = safe_decode(line, output_encoding).rstrip() logger.info("out: %s", line) # keep last 1000 lines only output.append(line) if len(output) > 1500: output = output[-1000:] returncode = process.wait() logger.info("Command exited with return code %s", process.returncode) if check_retcode is not None and returncode != check_retcode: raise failed_to_run_cmd("Bash command failed", cmd_str=cmd, return_code=returncode) return returncode except Exception: logger.info("Received interrupt. Terminating subprocess and waiting") try: process.terminate() process.wait(wait_for_termination_s) except Exception: pass raise
def request_builder(config_name): k8s_config = build_task_from_config(task_name=config_name) pod = k8s_config.build_pod(task_run=try_get_current_task_run(), cmds=["dummy"]) return k8s_config.build_kube_pod_req(pod)
def pod_builder(config_name): # explicitly build config for k8s k8s_config = build_task_from_config(task_name=config_name) pod = k8s_config.build_pod(task_run=try_get_current_task_run(), cmds=["dummy"]) return pod