Beispiel #1
0
def clean_jobs_folder(folder=None):
    folder = folder if folder else conf_get_default('cwl', 'jobs', None)
    if folder and os.path.isdir(folder):
        logging.info("Cleaning jobs folder\n- {}".format(folder))
        for item in os.listdir(folder):
            path = os.path.join(folder, item)
            try:
                os.remove(path)
            except OSError:
                shutil.rmtree(path, ignore_errors=False)
Beispiel #2
0
def update_config(args):
    logging.info("Update Airflow configuration")
    with open(conf.AIRFLOW_CONFIG, 'w') as output_stream:
        try:
            conf.conf.add_section('cwl')
        except configparser.DuplicateSectionError:
            pass
        conf.set('core', 'dags_are_paused_at_creation', 'False')
        conf.set('core', 'load_examples', 'False')
        conf.set('cwl', 'jobs', str(args.jobs))
        conf.set('cwl', 'limit', str(args.limit))
        conf.set('cwl', 'logging_level', conf_get_default('cwl', 'logging_level', 'ERROR'))  # To supress all useless output from cwltool's functions
        conf.set('core', 'dagbag_import_timeout', str(args.dag_timeout))
        conf.set('scheduler', 'max_threads', str(args.threads))
        conf.set('webserver', 'worker_refresh_interval', str(args.web_interval))
        conf.set('webserver', 'worker_refresh_batch_size', str(args.web_workers))
        conf.set('webserver', 'hide_paused_dags_by_default', 'True')
        conf.conf.write(output_stream)
Beispiel #3
0
def arg_parser():
    parent_parser = argparse.ArgumentParser(add_help=False)
    general_parser = argparse.ArgumentParser(description='cwl-airflow')

    general_parser.add_argument(
        "-q",
        "--quiet",
        dest='quiet',
        action="store_true",
        help="Suppress all output except warnings and errors")

    subparsers = general_parser.add_subparsers()
    subparsers.required = True

    init_parser = subparsers.add_parser('init',
                                        help="Init cwl-airflow",
                                        parents=[parent_parser])
    init_parser.set_defaults(func=run_init)
    init_parser.add_argument("-l",
                             "--limit",
                             dest='limit',
                             type=int,
                             help="Limit job concurrancy",
                             default=conf_get_default("cwl", "limit", 20))
    init_parser.add_argument("-j",
                             "--jobs",
                             dest='jobs',
                             type=str,
                             help="Jobs folder. Default: ~/airflow/jobs",
                             default=conf_get_default(
                                 "cwl", "jobs",
                                 os.path.join(AIRFLOW_HOME, 'jobs')))
    init_parser.add_argument(
        "-t",
        "--timeout",
        dest='dag_timeout',
        type=int,
        help=
        "How long before timing out a python file import while filling the DagBag",
        default=conf_get_default("core", "dagbag_import_timeout", 30))
    init_parser.add_argument(
        "-r",
        "--refresh",
        dest='web_interval',
        type=int,
        help="Webserver workers refresh interval, seconds",
        default=conf_get_default("webserver", "worker_refresh_interval", 30))
    init_parser.add_argument("-w",
                             "--workers",
                             dest='web_workers',
                             type=int,
                             help="Webserver workers refresh batch size",
                             default=conf_get_default(
                                 "webserver", "worker_refresh_batch_size", 1))
    init_parser.add_argument("-p",
                             "--threads",
                             dest='threads',
                             type=int,
                             help="Max Airflow Scheduler threads",
                             default=conf_get_default("scheduler",
                                                      "max_threads", 2))

    submit_parser = subparsers.add_parser('submit',
                                          help="Submit custom workflow",
                                          parents=[parent_parser])
    submit_parser.set_defaults(func=submit_job)
    submit_parser.add_argument("-o",
                               "--outdir",
                               dest='output_folder',
                               type=str,
                               help="Output directory. Default: ./",
                               default=".")
    submit_parser.add_argument(
        "-t",
        "--tmp",
        dest='tmp_folder',
        type=str,
        help="Folder to store temporary data. Default: /tmp")
    submit_parser.add_argument(
        "-u",
        "--uid",
        dest='uid',
        type=str,
        help="Experiment unique ID. Default: random uuid",
        default=str(uuid.uuid4()))
    submit_parser.add_argument("-r",
                               "--run",
                               dest='run',
                               action="store_true",
                               help="Run workflow with Airflow Scheduler")
    submit_parser.add_argument("workflow", type=str, help="Workflow file path")
    submit_parser.add_argument("job", type=str, help="Job file path")

    demo_parser = subparsers.add_parser('demo',
                                        help="Run demo workflows",
                                        parents=[parent_parser])
    demo_parser.set_defaults(func=run_demo)
    demo_parser.add_argument("-o",
                             "--outdir",
                             dest='output_folder',
                             type=str,
                             help="Output directory. Default: ./",
                             default=".")
    demo_parser.add_argument(
        "-t",
        "--tmp",
        dest='tmp_folder',
        type=str,
        help="Folder to store temporary data. Default: /tmp")
    demo_parser.add_argument(
        "-u",
        "--uid",
        dest='uid',
        type=str,
        help=
        "Experiment's unique ID; ignored with -a/-l arguments. Default: random uuid",
        default=str(uuid.uuid4()))
    demo_parser.add_argument("workflow",
                             type=str,
                             help="Demo workflow name from the list")

    excl_group = demo_parser.add_mutually_exclusive_group()
    excl_group.add_argument(
        "-a",
        "--auto",
        dest='auto',
        action="store_true",
        help="Run all demo workflows with Airflow Webserver & Scheduler")
    excl_group.add_argument(
        "-m",
        "--manual",
        dest='manual',
        action="store_true",
        help=
        "Submit all demo workflows. Requires Airflow Webserver & Scheduler to be run separately"
    )
    excl_group.add_argument("-l",
                            "--list",
                            dest='list',
                            action="store_true",
                            help="List demo workflows")

    return general_parser