def clean_jobs(folder=None): folder = folder if folder else conf_get_default('cwl', 'jobs', None) if folder and os.path.isdir(folder): logging.info("Cleaning jobs folder\n- {}".format(folder)) for item in os.listdir(folder): path = os.path.join(folder, item) dag_id = gen_dag_id(path) try: os.remove(path) logging.info("Remove job file\n- {}".format(path)) remove_dag(dag_id) except OSError: shutil.rmtree(path, ignore_errors=False)
def get_active_jobs(jobs_folder, limit=10): """ :param jobs_folder: job_folder: abs path to the folder with job json files :param limit: max number of jobs to return :return: """ all_jobs = [] for job_path in list_files(abs_path=jobs_folder, ext=[".json", ".yml", ".yaml"]): dag_id = gen_dag_id(job_path) dag_runs = DagRun.find(dag_id) all_jobs.append({"path": job_path, "creation_date": datetime.fromtimestamp(os.path.getctime(job_path)), "content": load_job(job_path), "dag_id": dag_id, "state": dag_runs[0].state if len(dag_runs) > 0 else State.NONE}) success_jobs = sorted([j for j in all_jobs if j["state"] == State.SUCCESS], key=lambda k: k["creation_date"], reverse=True)[:limit] running_jobs = sorted([j for j in all_jobs if j["state"] == State.RUNNING], key=lambda k: k["creation_date"], reverse=True)[:limit] failed_jobs = sorted([j for j in all_jobs if j["state"] == State.FAILED], key=lambda k: k["creation_date"], reverse=True)[:limit] unknown_jobs = sorted([j for j in all_jobs if j["state"] == State.NONE], key=lambda k: k["creation_date"], reverse=True)[:limit] return success_jobs + running_jobs + failed_jobs + unknown_jobs
def add_run_info(args): vars(args).update(vars(get_airflow_default_args("scheduler"))) args.dag_id = gen_dag_id( os.path.join(conf.get('cwl', 'jobs'), os.path.basename(args.job))) args.num_runs = len(get_dag(args).tasks) + 3