def stop_tasks(self, dr): logging.debug(f"Stop tasks for {dr.dag_id} - {dr.run_id}") for ti in dr.get_task_instances(): logging.debug( f"process {ti.dag_id} - {ti.task_id} - {ti.execution_date} - {ti.pid}" ) if ti.state == State.RUNNING: try: process = psutil.Process(ti.pid) if ti.pid else None except Exception: logging.debug(f" - cannot find process by PID {ti.pid}") process = None ti.set_state(State.FAILED) logging.debug(" - set state to FAILED") if process: logging.debug(f" - wait for process {ti.pid} to exit") try: cleanup_timeout = int( conf_get("core", "KILLED_TASK_CLEANUP_TIME", 60)) * 2 process.wait( timeout=cleanup_timeout ) # raises psutil.TimeoutExpired if timeout. Makes task fail -> DagRun fails except psutil.TimeoutExpired as e: logging.debug( f" - Done waiting for process {ti.pid} to die")
def wes_collect_attachments(self, run_id): tempdir = tempfile.mkdtemp(dir=get_dir( path.abspath( conf_get("cwl", "tmp_folder", path.join(AIRFLOW_HOME, "cwl_tmp_folder")))), prefix="run_id_" + run_id + "_") logging.debug(f"Save all attached files to {tempdir}") for k, ls in iterlists(connexion.request.files): logging.debug(f"Process attachment parameter {k}") if k == "workflow_attachment": for v in ls: try: logging.debug(f"Process attached file {v}") sp = v.filename.split("/") fn = [] for p in sp: if p not in ("", ".", ".."): fn.append(secure_filename(p)) dest = path.join(tempdir, *fn) if not path.isdir(path.dirname(dest)): get_dir(path.dirname(dest)) logging.debug(f"Save {v.filename} to {dest}") v.save(dest) except Exception as err: raise ValueError( f"Failed to process attached file {v}, {err}") body = {} for k, ls in iterlists(connexion.request.form): logging.debug(f"Process form parameter {k}") for v in ls: try: if not v: continue if k == "workflow_params": job_file = path.join(tempdir, "job.json") with open(job_file, "w") as f: json.dump(json.loads(v), f, indent=4) logging.debug(f"Save job file to {job_file}") loader = Loader(load.jobloaderctx.copy()) job_order_object, _ = loader.resolve_ref( job_file, checklinks=False) body[k] = job_order_object else: body[k] = v except Exception as err: raise ValueError( f"Failed to process form parameter {k}, {v}, {err}") if "workflow_params" not in body or "workflow_url" not in body: raise ValueError( "Missing 'workflow_params' or 'workflow_url' in submission") body["workflow_url"] = path.join(tempdir, secure_filename(body["workflow_url"])) return tempdir, body
def setup_cwl_logger(ti, level=None): """ Sets logging level of cwltool logger to correspond LOGGING_LEVEL from airflow.cfg. Configures handler based on the task instance to redirect output to the proper file. Suppresses those loggers from cwltool or related packages that spam. Note: maybe we will need to remove StreamHandler <stderr> handler from cwltool logger in case we see undesired outputs in the airflow logs but not in the separate files. """ level = conf_get("core", "LOGGING_LEVEL", "INFO").upper() if level is None else level cwl_logger = logging.getLogger("cwltool") for handler in cwl_logger.handlers: try: handler.set_context(ti) except AttributeError: pass cwl_logger.setLevel(level) less_verbose(loggers=["rdflib.term", "salad", "requests", "urllib3"], level="ERROR")