Ejemplo n.º 1
0
def clean_jobs(folder=None):
    folder = folder if folder else conf_get_default('cwl', 'jobs', None)
    if folder and os.path.isdir(folder):
        logging.info("Cleaning jobs folder\n- {}".format(folder))
        for item in os.listdir(folder):
            path = os.path.join(folder, item)
            dag_id = gen_dag_id(path)
            try:
                os.remove(path)
                logging.info("Remove job file\n- {}".format(path))
                remove_dag(dag_id)
            except OSError:
                shutil.rmtree(path, ignore_errors=False)
Ejemplo n.º 2
0
def get_active_jobs(jobs_folder, limit=10):
    """
    :param jobs_folder: job_folder: abs path to the folder with job json files  
    :param limit: max number of jobs to return
    :return: 
    """
    all_jobs = []
    for job_path in list_files(abs_path=jobs_folder, ext=[".json", ".yml", ".yaml"]):
        dag_id = gen_dag_id(job_path)
        dag_runs = DagRun.find(dag_id)
        all_jobs.append({"path": job_path,
                         "creation_date": datetime.fromtimestamp(os.path.getctime(job_path)),
                         "content": load_job(job_path),
                         "dag_id": dag_id,
                         "state": dag_runs[0].state if len(dag_runs) > 0 else State.NONE})
    success_jobs = sorted([j for j in all_jobs if j["state"] == State.SUCCESS], key=lambda k: k["creation_date"], reverse=True)[:limit]
    running_jobs = sorted([j for j in all_jobs if j["state"] == State.RUNNING], key=lambda k: k["creation_date"], reverse=True)[:limit]
    failed_jobs =  sorted([j for j in all_jobs if j["state"] == State.FAILED],  key=lambda k: k["creation_date"], reverse=True)[:limit]
    unknown_jobs = sorted([j for j in all_jobs if j["state"] == State.NONE],    key=lambda k: k["creation_date"], reverse=True)[:limit]
    return success_jobs + running_jobs + failed_jobs + unknown_jobs
Ejemplo n.º 3
0
def add_run_info(args):
    vars(args).update(vars(get_airflow_default_args("scheduler")))
    args.dag_id = gen_dag_id(
        os.path.join(conf.get('cwl', 'jobs'), os.path.basename(args.job)))
    args.num_runs = len(get_dag(args).tasks) + 3