def register_dags(configs_path):
    """
    Registers pipelines in liminal yml files found in given path (recursively) as airflow DAGs.
    """
    logging.info(f'Registering DAGs from path: {configs_path}')
    config_util = ConfigUtil(configs_path)
    # TODO - change is_render_variable to False when runtime resolving is available
    configs = config_util.safe_load(is_render_variables=True)

    if os.getenv('POD_NAMESPACE') != "jenkins":
        config_util.snapshot_final_liminal_configs()

    dags = []
    logging.info(f'found {len(configs)} liminal configs in path: {configs_path}')
    for config in configs:
        name = config['name'] if 'name' in config else None
        try:
            if not name:
                raise ValueError('liminal.yml missing field `name`')

            logging.info(f"Registering DAGs for {name}")

            owner = config.get('owner')

            trigger_rule = 'all_success'
            if 'always_run' in config and config['always_run']:
                trigger_rule = 'all_done'

            executors = __initialize_executors(config)

            for pipeline in config['pipelines']:
                default_args = __default_args(pipeline)
                dag = __initialize_dag(default_args, pipeline, owner)

                parent = None

                for task in pipeline['tasks']:
                    task_type = task['type']
                    task_instance = get_task_class(task_type)(
                        task_id=task['task'],
                        dag=dag,
                        parent=parent,
                        trigger_rule=trigger_rule,
                        liminal_config=config,
                        pipeline_config=pipeline,
                        task_config=task,
                        executor=executors.get(task.get('executor'))
                    )

                    parent = task_instance.apply_task_to_dag()

                logging.info(f'registered DAG {dag.dag_id}: {dag.tasks}')

                dags.append((pipeline['pipeline'], dag))

        except Exception:
            logging.error(f'Failed to register DAGs for {name}')
            traceback.print_exc()

    return dags
Ejemplo n.º 2
0
    def test_liminal_config_snapshot(self, find_config_files_mock,
                                     get_airflow_dir_mock, path_exists_mock):
        subliminal = {
            'name': 'my_subliminal_test',
            'type': 'sub',
            'variables': {
                'var': 1,
                'var-2': True
            },
            'pipelines': [
                {'name': 'mypipe1', 'param': '{{var}}'},
                {'name': 'mypipe2', 'param': '{{var-2   }}'}
            ]
        }

        expected = {'name': 'my_subliminal_test', 'type': 'sub',
                    'executors': [{'executor': 'default_k8s', 'type': 'kubernetes'},
                                  {'executor': 'airflow_executor', 'type': 'airflow'}],
                    'service_defaults': {'description': 'add defaults parameters for all services'},
                    'task_defaults': {
                        'description': 'add defaults parameters for all tasks separate by task type',
                        'python': {'executor': 'default_k8s'}, 'spark': {'executor': 'default_k8s'},
                        'job_end': {'executor': 'airflow_executor'},
                        'job_start': {'executor': 'airflow_executor'}}, 'pipeline_defaults': {
                'description': 'add defaults parameters for all pipelines',
                'before_tasks': [{'task': 'start', 'type': 'job_start'}],
                'after_tasks': [{'task': 'end', 'type': 'job_end'}]},
                    'variables': {'var': 1, 'var-2': True}, 'pipelines': [
                {'name': 'mypipe1', 'param': '1',
                 'description': 'add defaults parameters for all pipelines',
                 'tasks': [{'task': 'start', 'type': 'job_start', 'executor': 'airflow_executor'},
                           {'task': 'end', 'type': 'job_end', 'executor': 'airflow_executor'}]},
                {'name': 'mypipe2', 'param': 'True',
                 'description': 'add defaults parameters for all pipelines',
                 'tasks': [{'task': 'start', 'type': 'job_start', 'executor': 'airflow_executor'},
                           {'task': 'end', 'type': 'job_end', 'executor': 'airflow_executor'}]}],
                    'images': [],
                    'services': []}

        find_config_files_mock.return_value = {
            'my_subliminal_test': subliminal
        }

        get_airflow_dir_mock.return_value = '/tmp'
        path_exists_mock.return_value = True

        with mock.patch('builtins.open', mock.mock_open()) as m:
            with mock.patch('yaml.dump') as ydm:
                config_util = ConfigUtil('')
                config_util.safe_load(is_render_variables=True)
                config_util.snapshot_final_liminal_configs()

                m.assert_called_once_with(
                    os.path.join('/tmp', '../liminal_config_files/my_subliminal_test.yml'), 'w')
                ydm.assert_called_once_with(expected, m.return_value, default_flow_style=False)
Ejemplo n.º 3
0
    def test_liminal_config_snapshot(self, find_config_files_mock,
                                     get_airflow_dir_mock, path_exists_mock):
        subliminal = {
            "name": "my_subliminal_test",
            "type": "sub",
            "variables": {
                "var": 1,
                "var-2": True
            },
            "pipelines": [
                {"name": "mypipe1", "param": "{{var}}"},
                {"name": "mypipe2", "param": "{{var-2   }}"}
            ]
        }

        expected = {'name': 'my_subliminal_test', 'type': 'sub',
                    'service_defaults': {'description': 'add defaults parameters for all services'},
                    'task_defaults': {
                        'description': 'add defaults parameters for all tasks separate by task type'},
                    'pipeline_defaults': {
                        'description': 'add defaults parameters for all pipelines',
                        'before_tasks': [{'task': 'start', 'type': 'job_start'}],
                        'after_tasks': [{'task': 'end', 'type': 'job_end'}]},
                    'variables': {'var': 1, 'var-2': True}, 'pipelines': [
                {'name': 'mypipe1', 'param': '1',
                 'description': 'add defaults parameters for all pipelines',
                 'tasks': [{'task': 'start', 'type': 'job_start'},
                           {'task': 'end', 'type': 'job_end'}]},
                {'name': 'mypipe2', 'param': 'True',
                 'description': 'add defaults parameters for all pipelines',
                 'tasks': [{'task': 'start', 'type': 'job_start'},
                           {'task': 'end', 'type': 'job_end'}]}], 'services': []}

        find_config_files_mock.return_value = {
            "my_subliminal_test": subliminal
        }

        get_airflow_dir_mock.return_value = "/tmp"
        path_exists_mock.return_value = True

        with mock.patch("builtins.open", mock.mock_open()) as m:
            with mock.patch("yaml.dump") as ydm:
                config_util = ConfigUtil("")
                config_util.safe_load(is_render_variables=True)
                config_util.snapshot_final_liminal_configs()

                m.assert_called_once_with(
                    os.path.join('/tmp', '../liminal_config_files/my_subliminal_test.yml'), 'w')
                ydm.assert_called_once_with(expected, m.return_value, default_flow_style=False)