Exemple #1
0
    def setUp(self):
        self.remote_log_group = 'log_group_name'
        self.region_name = 'us-west-2'
        self.local_log_location = 'local/log/location'
        self.filename_template = '{dag_id}/{task_id}/{execution_date}/{try_number}.log'
        self.cloudwatch_task_handler = CloudwatchTaskHandler(
            self.local_log_location,
            f"arn:aws:logs:{self.region_name}:11111111:log-group:{self.remote_log_group}",
            self.filename_template,
        )
        self.cloudwatch_task_handler.hook

        date = datetime(2020, 1, 1)
        dag_id = 'dag_for_testing_file_task_handler'
        task_id = 'task_for_testing_file_log_handler'
        self.dag = DAG(dag_id=dag_id, start_date=date)
        task = DummyOperator(task_id=task_id, dag=self.dag)
        self.ti = TaskInstance(task=task, execution_date=date)
        self.ti.try_number = 1
        self.ti.state = State.RUNNING

        self.remote_log_stream = '{}/{}/{}/{}.log'.format(
            dag_id, task_id, date.isoformat(), self.ti.try_number
        ).replace(':', '_')

        moto.core.moto_api_backend.reset()
        self.conn = boto3.client('logs', region_name=self.region_name)
    def test_hook_raises(self):
        handler = CloudwatchTaskHandler(
            self.local_log_location,
            f"arn:aws:logs:{self.region_name}:11111111:log-group:{self.remote_log_group}",
            self.filename_template,
        )

        with mock.patch.object(handler.log, 'error') as mock_error:
            with mock.patch("airflow.providers.amazon.aws.hooks.logs.AwsLogsHook") as mock_hook:
                mock_hook.side_effect = Exception('Failed to connect')
                # Initialize the hook
                handler.hook

            mock_error.assert_called_once_with(
                'Could not create an AwsLogsHook with connection id "%s". Please make '
                'sure that airflow[aws] is installed and the Cloudwatch logs connection exists.',
                'aws_default',
            )
Exemple #3
0
class TestCloudwatchTaskHandler(unittest.TestCase):
    @conf_vars({('logging', 'remote_log_conn_id'): 'aws_default'})
    def setUp(self):
        self.remote_log_group = 'log_group_name'
        self.region_name = 'us-west-2'
        self.local_log_location = 'local/log/location'
        self.filename_template = '{dag_id}/{task_id}/{execution_date}/{try_number}.log'
        self.cloudwatch_task_handler = CloudwatchTaskHandler(
            self.local_log_location,
            f"arn:aws:logs:{self.region_name}:11111111:log-group:{self.remote_log_group}",
            self.filename_template,
        )
        self.cloudwatch_task_handler.hook

        date = datetime(2020, 1, 1)
        dag_id = 'dag_for_testing_file_task_handler'
        task_id = 'task_for_testing_file_log_handler'
        self.dag = DAG(dag_id=dag_id, start_date=date)
        task = DummyOperator(task_id=task_id, dag=self.dag)
        self.ti = TaskInstance(task=task, execution_date=date)
        self.ti.try_number = 1
        self.ti.state = State.RUNNING

        self.remote_log_stream = '{}/{}/{}/{}.log'.format(
            dag_id, task_id, date.isoformat(), self.ti.try_number
        ).replace(':', '_')

        moto.core.moto_api_backend.reset()
        self.conn = boto3.client('logs', region_name=self.region_name)

    def tearDown(self):
        self.cloudwatch_task_handler.handler = None

    def test_hook(self):
        assert isinstance(self.cloudwatch_task_handler.hook, AwsLogsHook)

    @conf_vars({('logging', 'remote_log_conn_id'): 'aws_default'})
    def test_hook_raises(self):
        handler = CloudwatchTaskHandler(
            self.local_log_location,
            f"arn:aws:logs:{self.region_name}:11111111:log-group:{self.remote_log_group}",
            self.filename_template,
        )

        with mock.patch.object(handler.log, 'error') as mock_error:
            with mock.patch("airflow.providers.amazon.aws.hooks.logs.AwsLogsHook") as mock_hook:
                mock_hook.side_effect = Exception('Failed to connect')
                # Initialize the hook
                handler.hook

            mock_error.assert_called_once_with(
                'Could not create an AwsLogsHook with connection id "%s". Please make '
                'sure that apache-airflow[aws] is installed and the Cloudwatch '
                'logs connection exists. Exception: "%s"',
                'aws_default',
                ANY,
            )

    def test_handler(self):
        self.cloudwatch_task_handler.set_context(self.ti)
        assert isinstance(self.cloudwatch_task_handler.handler, CloudWatchLogHandler)

    def test_write(self):
        handler = self.cloudwatch_task_handler
        handler.set_context(self.ti)
        messages = [str(i) for i in range(10)]

        with mock.patch("watchtower.CloudWatchLogHandler.emit") as mock_emit:
            for message in messages:
                handler.handle(message)
            mock_emit.assert_has_calls([call(message) for message in messages])

    def test_event_to_str(self):
        handler = self.cloudwatch_task_handler
        events = [
            {'timestamp': 1617400267123, 'message': 'First'},
            {'timestamp': 1617400367456, 'message': 'Second'},
            {'timestamp': 1617400467789, 'message': 'Third'},
        ]
        assert [handler._event_to_str(event) for event in events] == (
            [
                '[2021-04-02 21:51:07,123] First',
                '[2021-04-02 21:52:47,456] Second',
                '[2021-04-02 21:54:27,789] Third',
            ]
        )

    def test_read(self):
        # Confirmed via AWS Support call:
        # CloudWatch events must be ordered chronologically otherwise
        # boto3 put_log_event API throws InvalidParameterException
        # (moto does not throw this exception)
        generate_log_events(
            self.conn,
            self.remote_log_group,
            self.remote_log_stream,
            [
                {'timestamp': 1617400267123, 'message': 'First'},
                {'timestamp': 1617400367456, 'message': 'Second'},
                {'timestamp': 1617400467789, 'message': 'Third'},
            ],
        )

        msg_template = '*** Reading remote log from Cloudwatch log_group: {} log_stream: {}.\n{}\n'
        events = '\n'.join(
            [
                '[2021-04-02 21:51:07,123] First',
                '[2021-04-02 21:52:47,456] Second',
                '[2021-04-02 21:54:27,789] Third',
            ]
        )
        assert self.cloudwatch_task_handler.read(self.ti) == (
            [[('', msg_template.format(self.remote_log_group, self.remote_log_stream, events))]],
            [{'end_of_log': True}],
        )

    def test_read_wrong_log_stream(self):
        generate_log_events(
            self.conn,
            self.remote_log_group,
            'alternate_log_stream',
            [
                {'timestamp': 10000, 'message': 'First'},
                {'timestamp': 20000, 'message': 'Second'},
                {'timestamp': 30000, 'message': 'Third'},
            ],
        )

        msg_template = '*** Reading remote log from Cloudwatch log_group: {} log_stream: {}.\n{}\n'
        error_msg = 'Could not read remote logs from log_group: {} log_stream: {}.'.format(
            self.remote_log_group, self.remote_log_stream
        )
        assert self.cloudwatch_task_handler.read(self.ti) == (
            [[('', msg_template.format(self.remote_log_group, self.remote_log_stream, error_msg))]],
            [{'end_of_log': True}],
        )

    def test_read_wrong_log_group(self):
        generate_log_events(
            self.conn,
            'alternate_log_group',
            self.remote_log_stream,
            [
                {'timestamp': 10000, 'message': 'First'},
                {'timestamp': 20000, 'message': 'Second'},
                {'timestamp': 30000, 'message': 'Third'},
            ],
        )

        msg_template = '*** Reading remote log from Cloudwatch log_group: {} log_stream: {}.\n{}\n'
        error_msg = 'Could not read remote logs from log_group: {} log_stream: {}.'.format(
            self.remote_log_group, self.remote_log_stream
        )
        assert self.cloudwatch_task_handler.read(self.ti) == (
            [[('', msg_template.format(self.remote_log_group, self.remote_log_stream, error_msg))]],
            [{'end_of_log': True}],
        )

    def test_close_prevents_duplicate_calls(self):
        with mock.patch("watchtower.CloudWatchLogHandler.close") as mock_log_handler_close:
            with mock.patch("airflow.utils.log.file_task_handler.FileTaskHandler.set_context"):
                self.cloudwatch_task_handler.set_context(self.ti)
                for _ in range(5):
                    self.cloudwatch_task_handler.close()

                mock_log_handler_close.assert_called_once()
class TestCloudwatchTaskHandler(unittest.TestCase):
    @conf_vars({('logging', 'remote_log_conn_id'): 'aws_default'})
    def setUp(self):
        self.remote_log_group = 'log_group_name'
        self.region_name = 'us-west-2'
        self.local_log_location = 'local/log/location'
        self.filename_template = '{dag_id}/{task_id}/{execution_date}/{try_number}.log'
        self.cloudwatch_task_handler = CloudwatchTaskHandler(
            self.local_log_location,
            f"arn:aws:logs:{self.region_name}:11111111:log-group:{self.remote_log_group}",
            self.filename_template,
        )
        self.cloudwatch_task_handler.hook

        date = datetime(2020, 1, 1)
        dag_id = 'dag_for_testing_file_task_handler'
        task_id = 'task_for_testing_file_log_handler'
        self.dag = DAG(dag_id=dag_id, start_date=date)
        task = DummyOperator(task_id=task_id, dag=self.dag)
        self.ti = TaskInstance(task=task, execution_date=date)
        self.ti.try_number = 1
        self.ti.state = State.RUNNING

        self.remote_log_stream = '{}/{}/{}/{}.log'.format(
            dag_id, task_id, date.isoformat(),
            self.ti.try_number).replace(':', '_')

        moto.core.moto_api_backend.reset()
        self.conn = boto3.client('logs', region_name=self.region_name)

    def tearDown(self):
        self.cloudwatch_task_handler.handler = None

    def test_hook(self):
        self.assertIsInstance(self.cloudwatch_task_handler.hook, AwsLogsHook)

    @conf_vars({('logging', 'remote_log_conn_id'): 'aws_default'})
    def test_hook_raises(self):
        handler = CloudwatchTaskHandler(
            self.local_log_location,
            f"arn:aws:logs:{self.region_name}:11111111:log-group:{self.remote_log_group}",
            self.filename_template,
        )

        with mock.patch.object(handler.log, 'error') as mock_error:
            with mock.patch(
                    "airflow.providers.amazon.aws.hooks.logs.AwsLogsHook"
            ) as mock_hook:
                mock_hook.side_effect = Exception('Failed to connect')
                # Initialize the hook
                handler.hook

            mock_error.assert_called_once_with(
                'Could not create an AwsLogsHook with connection id "%s". Please make '
                'sure that airflow[aws] is installed and the Cloudwatch logs connection exists.',
                'aws_default',
            )

    def test_handler(self):
        self.cloudwatch_task_handler.set_context(self.ti)
        self.assertIsInstance(self.cloudwatch_task_handler.handler,
                              CloudWatchLogHandler)

    def test_write(self):
        handler = self.cloudwatch_task_handler
        handler.set_context(self.ti)
        messages = [str(i) for i in range(10)]

        with mock.patch("watchtower.CloudWatchLogHandler.emit") as mock_emit:
            for message in messages:
                handler.handle(message)
            mock_emit.assert_has_calls([call(message) for message in messages])

    def test_read(self):
        generate_log_events(
            self.conn,
            self.remote_log_group,
            self.remote_log_stream,
            [
                {
                    'timestamp': 20000,
                    'message': 'Second'
                },
                {
                    'timestamp': 10000,
                    'message': 'First'
                },
                {
                    'timestamp': 30000,
                    'message': 'Third'
                },
            ],
        )

        expected = (
            '*** Reading remote log from Cloudwatch log_group: {} log_stream: {}.\nFirst\nSecond\nThird\n'
        )
        self.assertEqual(
            self.cloudwatch_task_handler.read(self.ti),
            (
                [[('',
                   expected.format(self.remote_log_group,
                                   self.remote_log_stream))]],
                [{
                    'end_of_log': True
                }],
            ),
        )

    def test_read_wrong_log_stream(self):
        generate_log_events(
            self.conn,
            self.remote_log_group,
            'alternate_log_stream',
            [
                {
                    'timestamp': 20000,
                    'message': 'Second'
                },
                {
                    'timestamp': 10000,
                    'message': 'First'
                },
                {
                    'timestamp': 30000,
                    'message': 'Third'
                },
            ],
        )

        msg_template = '*** Reading remote log from Cloudwatch log_group: {} log_stream: {}.\n{}\n'
        error_msg = 'Could not read remote logs from log_group: {} log_stream: {}.'.format(
            self.remote_log_group, self.remote_log_stream)
        self.assertEqual(
            self.cloudwatch_task_handler.read(self.ti),
            (
                [[('',
                   msg_template.format(self.remote_log_group,
                                       self.remote_log_stream, error_msg))]],
                [{
                    'end_of_log': True
                }],
            ),
        )

    def test_read_wrong_log_group(self):
        generate_log_events(
            self.conn,
            'alternate_log_group',
            self.remote_log_stream,
            [
                {
                    'timestamp': 20000,
                    'message': 'Second'
                },
                {
                    'timestamp': 10000,
                    'message': 'First'
                },
                {
                    'timestamp': 30000,
                    'message': 'Third'
                },
            ],
        )

        msg_template = '*** Reading remote log from Cloudwatch log_group: {} log_stream: {}.\n{}\n'
        error_msg = 'Could not read remote logs from log_group: {} log_stream: {}.'.format(
            self.remote_log_group, self.remote_log_stream)
        self.assertEqual(
            self.cloudwatch_task_handler.read(self.ti),
            (
                [[('',
                   msg_template.format(self.remote_log_group,
                                       self.remote_log_stream, error_msg))]],
                [{
                    'end_of_log': True
                }],
            ),
        )

    def test_close_prevents_duplicate_calls(self):
        with mock.patch("watchtower.CloudWatchLogHandler.close"
                        ) as mock_log_handler_close:
            with mock.patch(
                    "airflow.utils.log.file_task_handler.FileTaskHandler.set_context"
            ):
                self.cloudwatch_task_handler.set_context(self.ti)
                for _ in range(5):
                    self.cloudwatch_task_handler.close()

                mock_log_handler_close.assert_called_once()