Ejemplo n.º 1
0
    def test_xcom_enable_pickle_type(self):
        json_obj = {"key": "value"}
        execution_date = timezone.utcnow()
        key = "xcom_test2"
        dag_id = "test_dag2"
        task_id = "test_task2"

        configuration.set("core", "enable_xcom_pickling", "True")

        XCom.set(key=key,
                 value=json_obj,
                 dag_id=dag_id,
                 task_id=task_id,
                 execution_date=execution_date)

        ret_value = XCom.get_one(key=key,
                                 dag_id=dag_id,
                                 task_id=task_id,
                                 execution_date=execution_date)

        self.assertEqual(ret_value, json_obj)

        session = settings.Session()
        ret_value = session.query(XCom).filter(XCom.key == key, XCom.dag_id == dag_id,
                                               XCom.task_id == task_id,
                                               XCom.execution_date == execution_date
                                               ).first().value

        self.assertEqual(ret_value, json_obj)
Ejemplo n.º 2
0
    def test_xcom_get_many(self):
        json_obj = {"key": "value"}
        execution_date = timezone.utcnow()
        key = "xcom_test4"
        dag_id1 = "test_dag4"
        task_id1 = "test_task4"
        dag_id2 = "test_dag5"
        task_id2 = "test_task5"

        configuration.set("core", "xcom_enable_pickling", "True")

        XCom.set(key=key,
                 value=json_obj,
                 dag_id=dag_id1,
                 task_id=task_id1,
                 execution_date=execution_date)

        XCom.set(key=key,
                 value=json_obj,
                 dag_id=dag_id2,
                 task_id=task_id2,
                 execution_date=execution_date)

        results = XCom.get_many(key=key,
                                execution_date=execution_date)

        for result in results:
            self.assertEqual(result.value, json_obj)
Ejemplo n.º 3
0
    def test_email_alert_with_config(self, mock_send_email):
        dag = models.DAG(dag_id='test_failure_email')
        task = BashOperator(
            task_id='test_email_alert_with_config',
            dag=dag,
            bash_command='exit 1',
            start_date=DEFAULT_DATE,
            email='to')

        ti = TI(
            task=task, execution_date=datetime.datetime.now())

        configuration.set('email', 'SUBJECT_TEMPLATE', '/subject/path')
        configuration.set('email', 'HTML_CONTENT_TEMPLATE', '/html_content/path')

        opener = mock_open(read_data='template: {{ti.task_id}}')
        with patch('airflow.models.taskinstance.open', opener, create=True):
            try:
                ti.run()
            except AirflowException:
                pass

        (email, title, body), _ = mock_send_email.call_args
        self.assertEqual(email, 'to')
        self.assertEqual('template: test_email_alert_with_config', title)
        self.assertEqual('template: test_email_alert_with_config', body)
Ejemplo n.º 4
0
def validate_logging_config(logging_config):
    # Now lets validate the other logging-related settings
    task_log_reader = conf.get('core', 'task_log_reader')

    logger = logging.getLogger('airflow.task')

    def _get_handler(name):
        return next((h for h in logger.handlers if h.name == name), None)

    if _get_handler(task_log_reader) is None:
        # Check for pre 1.10 setting that might be in deployed airflow.cfg files
        if task_log_reader == "file.task" and _get_handler("task"):
            warnings.warn(
                "task_log_reader setting in [core] has a deprecated value of "
                "{!r}, but no handler with this name was found. Please update "
                "your config to use {!r}. Running config has been adjusted to "
                "match".format(
                    task_log_reader,
                    "task",
                ),
                DeprecationWarning,
            )
            conf.set('core', 'task_log_reader', 'task')
        else:
            raise AirflowConfigException(
                "Configured task_log_reader {!r} was not a handler of the 'airflow.task' "
                "logger.".format(task_log_reader)
            )
Ejemplo n.º 5
0
    def setUp(self):
        super(TestLogView, self).setUp()

        # Create a custom logging configuration
        configuration.load_test_config()
        logging_config = copy.deepcopy(DEFAULT_LOGGING_CONFIG)
        current_dir = os.path.dirname(os.path.abspath(__file__))
        logging_config['handlers']['task']['base_log_folder'] = os.path.normpath(
            os.path.join(current_dir, 'test_logs'))
        logging_config['handlers']['task']['filename_template'] = \
            '{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts | replace(":", ".") }}/{{ try_number }}.log'

        # Write the custom logging configuration to a file
        self.settings_folder = tempfile.mkdtemp()
        settings_file = os.path.join(self.settings_folder, "airflow_local_settings.py")
        new_logging_file = "LOGGING_CONFIG = {}".format(logging_config)
        with open(settings_file, 'w') as handle:
            handle.writelines(new_logging_file)
        sys.path.append(self.settings_folder)
        conf.set('core', 'logging_config_class', 'airflow_local_settings.LOGGING_CONFIG')

        app = application.create_app(testing=True)
        self.app = app.test_client()
        self.session = Session()
        from airflow.www.views import dagbag
        dag = DAG(self.DAG_ID, start_date=self.DEFAULT_DATE)
        task = DummyOperator(task_id=self.TASK_ID, dag=dag)
        dagbag.bag_dag(dag, parent_dag=dag, root_dag=dag)
        ti = TaskInstance(task=task, execution_date=self.DEFAULT_DATE)
        ti.try_number = 1
        self.session.merge(ti)
        self.session.commit()
Ejemplo n.º 6
0
 def setUp(self):
     application.app = None
     super(TestMountPoint, self).setUp()
     conf.load_test_config()
     conf.set("webserver", "base_url", "http://localhost:8080/test")
     config = dict()
     config['WTF_CSRF_METHODS'] = []
     app = application.cached_app(config=config, testing=True)
     self.client = Client(app)
Ejemplo n.º 7
0
    def tearDown(self):
        logging.config.dictConfig(DEFAULT_LOGGING_CONFIG)
        self.clear_table(TaskInstance)

        shutil.rmtree(self.settings_folder)
        conf.set('core', 'logging_config_class', '')

        self.logout()
        super(TestLogView, self).tearDown()
 def test_1_9_config(self):
     from airflow.logging_config import configure_logging
     conf.set('core', 'task_log_reader', 'file.task')
     try:
         with self.assertWarnsRegex(DeprecationWarning, r'file.task'):
             configure_logging()
             self.assertEqual(conf.get('core', 'task_log_reader'), 'task')
     finally:
         conf.remove_option('core', 'task_log_reader', remove_default=False)
Ejemplo n.º 9
0
 def __enter__(self):
     with open(self.settings_file, 'w') as handle:
         handle.writelines(self.content)
     sys.path.append(self.settings_root)
     conf.set(
         'core',
         'logging_config_class',
         self.module
     )
     return self.settings_file
Ejemplo n.º 10
0
    def test_xcom_disable_pickle_type_fail_on_non_json(self):
        class PickleRce(object):
            def __reduce__(self):
                return os.system, ("ls -alt",)

        configuration.set("core", "xcom_enable_pickling", "False")

        self.assertRaises(TypeError, XCom.set,
                          key="xcom_test3",
                          value=PickleRce(),
                          dag_id="test_dag3",
                          task_id="test_task3",
                          execution_date=timezone.utcnow())
Ejemplo n.º 11
0
    def tearDown(self):
        logging.config.dictConfig(DEFAULT_LOGGING_CONFIG)
        self.session.query(TaskInstance).filter(
            TaskInstance.dag_id == self.DAG_ID and
            TaskInstance.task_id == self.TASK_ID and
            TaskInstance.execution_date == self.DEFAULT_DATE).delete()
        self.session.commit()
        self.session.close()

        sys.path.remove(self.settings_folder)
        shutil.rmtree(self.settings_folder)
        conf.set('core', 'logging_config_class', '')

        super(TestLogView, self).tearDown()
Ejemplo n.º 12
0
    def test_command_execution_with_env(self):
        configuration.set("core", "enable_xcom_pickling", "True")
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="echo -n airflow",
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(
            task=task, execution_date=datetime.now())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
Ejemplo n.º 13
0
    def test_no_output_command(self):
        configuration.set("core", "enable_xcom_pickling", "True")
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="sleep 1",
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(
            task=task, execution_date=timezone.utcnow())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'')
Ejemplo n.º 14
0
    def test_json_command_execution(self):
        configuration.set("core", "enable_xcom_pickling", "False")
        task = SSHOperator(
                task_id="test",
                ssh_hook=self.hook,
                command="echo -n airflow",
                do_xcom_push=True,
                dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(
                task=task, execution_date=timezone.utcnow())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'),
                         b64encode(b'airflow').decode('utf-8'))
Ejemplo n.º 15
0
    def test_json_file_transfer_get(self):
        configuration.set("core", "enable_xcom_pickling", "False")
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # create a test file remotely
        create_file_task = SSHOperator(
                task_id="test_create_file",
                ssh_hook=self.hook,
                command="echo '{0}' > {1}".format(test_remote_file_content,
                                                  self.test_remote_filepath),
                do_xcom_push=True,
                dag=self.dag
        )
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task, execution_date=datetime.now())
        ti1.run()

        # get remote file to local
        get_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.GET,
                dag=self.dag
        )
        self.assertIsNotNone(get_test_task)
        ti2 = TaskInstance(task=get_test_task, execution_date=datetime.now())
        ti2.run()

        # test the received content
        content_received = None
        with open(self.test_local_filepath, 'r') as f:
            content_received = f.read()
        self.assertEqual(content_received.strip(),
            test_remote_file_content.encode('utf-8').decode('utf-8'))
Ejemplo n.º 16
0
    def test_pickle_file_transfer_put(self):
        configuration.set("core", "enable_xcom_pickling", "True")
        test_local_file_content = \
            b"This is local file content \n which is multiline " \
            b"continuing....with other character\nanother line here \n this is last line"
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as f:
            f.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.PUT,
                dag=self.dag
        )
        self.assertIsNotNone(put_test_task)
        ti2 = TaskInstance(task=put_test_task, execution_date=datetime.now())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
                task_id="test_check_file",
                ssh_hook=self.hook,
                command="cat {0}".format(self.test_remote_filepath),
                do_xcom_push=True,
                dag=self.dag
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task, execution_date=datetime.now())
        ti3.run()
        self.assertEqual(
                ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(),
                test_local_file_content)
Ejemplo n.º 17
0
    def test_loading_remote_logging_with_wasb_handler(self):
        """Test if logging can be configured successfully for Azure Blob Storage"""
        import logging
        from airflow.config_templates import airflow_local_settings
        from airflow.logging_config import configure_logging
        from airflow.utils.log.wasb_task_handler import WasbTaskHandler

        conf.set('core', 'remote_logging', 'True')
        conf.set('core', 'remote_log_conn_id', 'some_wasb')
        conf.set('core', 'remote_base_log_folder', 'wasb://some-folder')

        six.moves.reload_module(airflow_local_settings)
        configure_logging()

        logger = logging.getLogger('airflow.task')
        self.assertIsInstance(logger.handlers[0], WasbTaskHandler)
Ejemplo n.º 18
0
    def test_tls(self):
        with dask_testing_cluster(
                worker_kwargs={'security': tls_security()},
                scheduler_kwargs={'security': tls_security()}) as (s, workers):

            # These use test certs that ship with dask/distributed and should not be
            #  used in production
            configuration.set('dask', 'tls_ca', get_cert('tls-ca-cert.pem'))
            configuration.set('dask', 'tls_cert', get_cert('tls-key-cert.pem'))
            configuration.set('dask', 'tls_key', get_cert('tls-key.pem'))
            try:
                executor = DaskExecutor(cluster_address=s['address'])

                self.assert_tasks_on_executor(executor)

                executor.end()
                # close the executor, the cluster context manager expects all listeners
                # and tasks to have completed.
                executor.client.close()
            finally:
                configuration.set('dask', 'tls_ca', '')
                configuration.set('dask', 'tls_key', '')
                configuration.set('dask', 'tls_cert', '')
Ejemplo n.º 19
0
 def __enter__(self):
     with open(self.settings_file, 'w') as handle:
         handle.writelines(self.content)
     sys.path.append(self.settings_root)
     conf.set('core', 'logging_config_class', self.module)
     return self.settings_file
Ejemplo n.º 20
0
 def __exit__(self, *exc_info):
     # shutil.rmtree(self.settings_root)
     # Reset config
     conf.set('core', 'logging_config_class', '')
     sys.path.remove(self.settings_root)
Ejemplo n.º 21
0
 def __exit__(self, *exc_info):
     # shutil.rmtree(self.settings_root)
     # Reset config
     conf.set('core', 'logging_config_class', '')
     sys.path.remove(self.settings_root)
Ejemplo n.º 22
0
    def test_tls(self):
        with dask_testing_cluster(
                worker_kwargs={'security': tls_security()},
                scheduler_kwargs={'security': tls_security()}) as (s, workers):

            # These use test certs that ship with dask/distributed and should not be
            #  used in production
            configuration.set('dask', 'tls_ca', get_cert('tls-ca-cert.pem'))
            configuration.set('dask', 'tls_cert', get_cert('tls-key-cert.pem'))
            configuration.set('dask', 'tls_key', get_cert('tls-key.pem'))
            try:
                executor = DaskExecutor(cluster_address=s['address'])

                self.assert_tasks_on_executor(executor)

                executor.end()
                # close the executor, the cluster context manager expects all listeners
                # and tasks to have completed.
                executor.client.close()
            finally:
                configuration.set('dask', 'tls_ca', '')
                configuration.set('dask', 'tls_key', '')
                configuration.set('dask', 'tls_cert', '')
Ejemplo n.º 23
0
def generate_biowardrobe_workflow():

    _template = u"""#!/usr/bin/env python3
from airflow import DAG
from biowardrobe_cwl_workflows import workflow
dag = workflow("{}")
"""
    all_workflows = available()
    for workflow in all_workflows:
        if not workflow:
            continue

        _filename = os.path.abspath(
            os.path.join(
                DAGS_FOLDER,
                os.path.basename(os.path.splitext(workflow)[0]) + '.py'))
        print(_filename)
        with open(_filename, 'w') as generated_workflow_stream:
            generated_workflow_stream.write(_template.format(workflow))

    try:
        api_client.get_pool(name='basic_analysis')
    except Exception as e:
        api_client.create_pool(name='basic_analysis',
                               slots=1,
                               description="pool to run basic analysis")

    if not conf.has_option('cwl', 'tmp_folder'):
        if not os.path.exists(conf.AIRFLOW_CONFIG + '.orig'):
            copyfile(conf.AIRFLOW_CONFIG, conf.AIRFLOW_CONFIG + '.orig')
        with open(conf.AIRFLOW_CONFIG, 'w') as fp:
            # for s in ['mesos', 'kerberos', 'celery', 'smtp', 'email', 'dask', 'ldap']:
            #     conf.conf.remove_section(s)

            conf.conf.add_section('cwl')
            conf.set('cwl', 'tmp_folder', os.path.join(AIRFLOW_HOME, 'tmp'))

            conf.set('core', 'logging_level', 'WARNING')
            conf.set('core', 'load_examples', 'False')
            conf.set('webserver', 'dag_default_view', 'graph')
            conf.set('webserver', 'dag_orientation', 'TB')
            conf.set('webserver', 'web_server_worker_timeout', '120')
            conf.set('scheduler', 'job_heartbeat_sec', '20')
            conf.set('scheduler', 'scheduler_heartbeat_sec', '20')
            conf.set('scheduler', 'min_file_process_interval', '30')
            conf.conf.write(fp)

    # startup_scripts = ['com.datirium.airflow-scheduler.plist', 'com.datirium.airflow-webserver.plist']
    # if platform == "darwin":
    #     _sys_dir = os.path.expanduser('~/Library/LaunchAgents')
    #     for scripts in startup_scripts:
    #         with open(os.path.join(system_folder, 'macosx', scripts), 'r') as s:
    #             data = s.read()
    #             # OS X
    #         dst = os.path.join(_sys_dir, scripts)
    #
    #         if os.path.exists(dst):
    #             with open(dst + '.new', 'w') as w:
    #                 w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME))
    #         else:
    #             with open(dst, 'w') as w:
    #                 w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME))

    # if platform == "linux" or platform == "linux2":
    # linux
    # elif platform == "win32":
    # Windows...

    # TODO: tmp, dags do not exist ???


# generate_biowardrobe_workflow()