def test_repair_hive_table_unknown_bucket(self, mock_logging):
        """Athena - Repair Hive Table - Unknown Bucket"""
        self.client.athena_client = MockAthenaClient(result_state='SUCCEEDED')

        # This bucket is not in our `repair_hive_table` config map
        self.client.repair_hive_table({'my-test.result.bucket'})
        assert_true(mock_logging.warning.called)
    def test_repair_hive_table_failed_refresh(self, mock_logging):
        """Athena - Repair Hive Table - Failed Refresh"""
        self.client.athena_client = MockAthenaClient(result_state='FAILED')

        # This bucket is not in our `repair_hive_table` config map
        self.client.repair_hive_table({'unit-testing.streamalerts'})
        assert_true(mock_logging.error.called)
    def test_add_hive_partition(self, mock_logging):
        """Athena - Add Hive Partition"""
        query_result = [{'Repair: added data to metastore:foobar'},
                        {'Repair: added data to metastore:foobaz'}]
        self.client.athena_client = MockAthenaClient(results=query_result)
        result = self.client.add_hive_partition({
            'unit-testing.streamalerts':
            set([
                'alerts/dt=2017-08-26-14/rule_name_alerts-1304134918401.json',
                'alerts/dt=2017-08-27-14/rule_name_alerts-1304134918401.json'
            ]),
            'unit-testing.streamalert.data':
            set([
                'log_type_1/2017/08/26/14/test-data-11111-22222-33333.snappy',
                'log_type_2/2017/08/26/14/test-data-11111-22222-33333.snappy',
                'log_type_2/2017/08/26/15/test-data-11111-22222-33333.snappy',
                'log_type_2/2017/08/26/16/test-data-11111-22222-33333.snappy',
                'log_type_3/2017/08/26/14/test-data-11111-22222-33333.snappy',
                'log_type_1/2017/08/26/11/test-data-11111-22222-33333.snappy'
            ]),
            'test-bucket-with-data':
            set([
                '2017/08/26/14/rule_name_alerts-1304134918401.json',
                '2017/08/28/14/rule_name_alerts-1304134918401.json',
                '2017/07/30/14/rule_name_alerts-1304134918401.json'
            ])
        })

        assert_true(mock_logging.info.called)
        assert_true(result)
    def test_repair_hive_table(self, mock_logging):
        """Athena - Repair Hive Table"""
        query_result = [{'Status': 'SUCCEEDED'}]
        self.client.athena_client = MockAthenaClient(results=query_result)

        self.client.repair_hive_table({'unit-testing.streamalerts'})
        assert_true(mock_logging.info.called)
    def test_check_table_exists_invalid(self, mock_logging):
        """Athena - Check Table Exists - Does Not Exist"""
        query_result = None
        self.client.athena_client = MockAthenaClient(results=query_result)

        result = self.client.check_table_exists('unit-test')
        assert_false(result)
        assert_true(mock_logging.info.called)
    def test_run_athena_query_async(self):
        """Athena - Run Athena Query - Async Call"""
        query_result = []
        self.client.athena_client = MockAthenaClient(results=query_result)

        query_success, _ = self.client.run_athena_query(
            query='SHOW DATABASES;', async=True)

        assert_true(query_success)
Example #7
0
 def setup(self, boto_patch):
     """Setup the AthenaRefresher tests"""
     self.mock_sqs = mock_sqs()
     self.mock_sqs.start()
     boto_patch.client.return_value = MockAthenaClient()
     sqs = boto3.resource('sqs')
     name = StreamAlertSQSClient.DEFAULT_QUEUE_NAME.format('unit-testing')
     self.queue = sqs.create_queue(QueueName=name)
     self._refresher = AthenaRefresher()
Example #8
0
    def test_run_athena_query(self):
        """Athena - Run Athena Query - Normal Result"""
        self.client.athena_client = MockAthenaClient()

        query_success, query_results = self.client.run_athena_query(
            query='SHOW DATABASES;'
        )

        assert_true(query_success)
        assert_equal(query_results['ResultSet']['Rows'], [{'Data': [{'test': 'test'}]}])
    def test_check_table_exists(self):
        """Athena - Check Table Exists"""
        query_result = [{'alerts': True}]
        self.client.athena_client = MockAthenaClient(results=query_result)

        result = self.client.check_table_exists('unit-test')
        assert_true(result)

        generated_results_key = 'unit-testing/{}'.format(
            datetime.now().strftime('%Y/%m/%d'))
        assert_equal(self.client.athena_results_key, generated_results_key)
    def test_run_athena_query_error(self, mock_logging):
        """Athena - Run Athena Query - Error Result"""
        self.client.athena_client = MockAthenaClient(results=None,
                                                     result_state='FAILED')

        query_success, query_results = self.client.run_athena_query(
            query='SHOW DATABASES;')

        assert_true(mock_logging.error.called)
        assert_false(query_success)
        assert_equal(query_results, {})
    def test_run_athena_query_empty(self, mock_logging):
        """Athena - Run Athena Query - Empty Result"""
        query_result = None
        self.client.athena_client = MockAthenaClient(results=query_result)

        query_success, query_results = self.client.run_athena_query(
            query='SHOW DATABASES;')

        assert_true(query_success)
        assert_equal(query_results['ResultSet']['Rows'], [])
        assert_true(mock_logging.debug.called)
Example #12
0
    def test_add_hive_partition_unknown_bucket(self, mock_logging):
        """Athena - Add Hive Partition - Unknown Bucket"""
        self.client.athena_client = MockAthenaClient(results=[])
        result = self.client.add_hive_partition({
            'bucket-not-in-config.streamalerts': set([
                'alerts/dt=2017-08-26-14/rule_name_alerts-1304134918401.json',
                'alerts/dt=2017-08-27-14/rule_name_alerts-1304134918401.json',
            ])
        })

        assert_true(mock_logging.error.called)
        assert_false(result)
    def test_add_hive_partition_unexpected_s3_key(self, mock_logging):
        """Athena - Add Hive Partition - Unexpected S3 Key"""
        self.client.athena_client = MockAthenaClient(results=[])
        result = self.client.add_hive_partition({
            'unit-testing.streamalerts':
            set(['a/pattern/that/does/not-match']),
            'test-bucket-with-data':
            set(['another/pattern/that/does/not-match'])
        })

        assert_true(mock_logging.error.called)
        assert_false(result)
Example #14
0
    def test_rebuild_partitions():
        """CLI - Athena rebuild partitions helper"""

        with patch('streamalert.shared.athena.boto3') as mock_athena:
            mock_show_partitions_result = [{
                'Data': [{
                    'VarCharValue': 'dt=2019-12-04-05'
                }]
            }, {
                'Data': [{
                    'VarCharValue': 'dt=2019-12-03-22'
                }]
            }, {
                'Data': [{
                    'VarCharValue': 'dt=2019-12-03-23'
                }]
            }, {
                'Data': [{
                    'VarCharValue': 'dt=2019-12-03-20'
                }]
            }, {
                'Data': [{
                    'VarCharValue': 'dt=2019-12-04-01'
                }]
            }]

            mock_show_table_result = []
            mock_athena.client.side_effect = [
                MockAthenaClient(results=mock_show_partitions_result),
                MockAthenaClient(results=mock_show_table_result)
            ]

            config = MockCLIConfig(config=athena_cli_basic_config())

            table = 'unit_my_test'
            bucket = 'bucket'
            assert_true(handler.rebuild_partitions(table, bucket, config))
def _mock_boto(name):
    """Hack to allow mocking boto3.client with moto and our own class"""
    if name == 'athena':
        return MockAthenaClient()

    return client(name)
Example #16
0
 def setup(self, boto_patch):
     """Setup the AthenaRefresher tests"""
     boto_patch.client.return_value = MockAthenaClient()
     self._refresher = AthenaRefresher()
    def test_check_database_exists(self):
        """Athena - Check Database Exists"""
        query_result = [{'streamalert': True}]
        self.client.athena_client = MockAthenaClient(results=query_result)

        assert_true(self.client.check_database_exists())
    def test_check_database_exists_invalid(self):
        """Athena - Check Database Exists - Does Not Exist"""
        query_result = None
        self.client.athena_client = MockAthenaClient(results=query_result)

        assert_false(self.client.check_database_exists())
Example #19
0
 def setup(self, boto_patch):
     """Setup the AthenaPartitioner tests"""
     boto_patch.client.return_value = MockAthenaClient()
     self._partitioner = AthenaPartitioner()
Example #20
0
class TestStreamAlertAthenaClient(object):
    """Test class for StreamAlertAthenaClient"""
    @patch.dict(os.environ, {'AWS_DEFAULT_REGION': 'us-west-1'})
    @patch('boto3.client', Mock(side_effect=lambda c: MockAthenaClient()))
    def setup(self):
        """Setup the StreamAlertAthenaClient tests"""

        self._db_name = 'test_database'
        config = load_config('tests/unit/conf/')
        prefix = config['global']['account']['prefix']

        self.client = StreamAlertAthenaClient(
            self._db_name, 's3://{}.streamalert.athena-results'.format(prefix),
            'unit-testing')

    @patch('stream_alert.shared.athena.datetime')
    def test_init_fix_bucket_path(self, date_mock):
        """Athena - Fix Bucket Path"""
        date_now = datetime.utcnow()
        date_mock.utcnow.return_value = date_now
        date_format = date_now.strftime('%Y/%m/%d')
        expected_path = 's3://test.streamalert.athena-results/unit-testing/{}'.format(
            date_format)
        with patch.dict(os.environ, {'AWS_DEFAULT_REGION': 'us-west-1'}):
            client = StreamAlertAthenaClient(
                self._db_name, 'test.streamalert.athena-results',
                'unit-testing')
            assert_equal(client._s3_results_path, expected_path)

    def test_unique_values_from_query(self):
        """Athena - Unique Values from Query"""
        query = {
            'ResultSet': {
                'Rows': [
                    {
                        'Data': [{
                            'VarCharValue': 'foobar'
                        }]
                    },
                    {
                        'Data': [{
                            'VarCharValue': 'barfoo'
                        }]
                    },
                    {
                        'Data': [{
                            'VarCharValue': 'barfoo'
                        }]
                    },
                    {
                        'Data': [{
                            'VarCharValue': 'foobarbaz'
                        }]
                    },
                ]
            }
        }
        expected_result = {'foobar', 'barfoo', 'foobarbaz'}

        result = self.client._unique_values_from_query(query)
        assert_items_equal(result, expected_result)

    def test_check_database_exists(self):
        """Athena - Check Database Exists"""
        self.client._client.results = [{
            'Data': [{
                'VarCharValue': self._db_name
            }]
        }]

        assert_true(self.client.check_database_exists())

    def test_check_database_exists_invalid(self):
        """Athena - Check Database Exists - Does Not Exist"""
        self.client._client.results = None

        assert_false(self.client.check_database_exists())

    def test_check_table_exists(self):
        """Athena - Check Table Exists"""
        self.client._client.results = [{
            'Data': [{
                'VarCharValue': 'test_table'
            }]
        }]

        assert_true(self.client.check_table_exists('test_table'))

    def test_check_table_exists_invalid(self):
        """Athena - Check Table Exists - Does Not Exist"""
        self.client._client.results = None

        assert_false(self.client.check_table_exists('test_table'))

    def test_get_table_partitions(self):
        """Athena - Get Table Partitions"""
        self.client._client.results = [
            {
                'Data': [{
                    'VarCharValue': 'dt=2018-12-10-10'
                }]
            },
            {
                'Data': [{
                    'VarCharValue': 'dt=2018-12-09-10'
                }]
            },
            {
                'Data': [{
                    'VarCharValue': 'dt=2018-12-09-10'
                }]
            },
            {
                'Data': [{
                    'VarCharValue': 'dt=2018-12-11-10'
                }]
            },
        ]

        expected_result = {
            'dt=2018-12-10-10', 'dt=2018-12-09-10', 'dt=2018-12-11-10'
        }

        result = self.client.get_table_partitions('test_table')
        assert_items_equal(result, expected_result)

    def test_get_table_partitions_error(self):
        """Athena - Get Table Partitions, Exception"""
        self.client._client.raise_exception = True
        result = self.client.get_table_partitions('test_table')
        assert_equal(result, None)

    def test_drop_table(self):
        """Athena - Drop Table, Success"""
        assert_true(self.client.drop_table('test_table'))

    def test_drop_table_failure(self):
        """Athena - Drop Table, Failure"""
        self.client._client.raise_exception = True
        assert_false(self.client.drop_table('test_table'))

    @patch('stream_alert.shared.athena.StreamAlertAthenaClient.drop_table')
    def test_drop_all_tables(self, drop_table_mock):
        """Athena - Drop All Tables, Success"""
        self.client._client.results = [
            {
                'Data': [{
                    'VarCharValue': 'table_01'
                }]
            },
            {
                'Data': [{
                    'VarCharValue': 'table_02'
                }]
            },
            {
                'Data': [{
                    'VarCharValue': 'table_02'
                }]
            },
        ]
        assert_true(self.client.drop_all_tables())
        assert_equal(drop_table_mock.call_count, 2)

    @patch('stream_alert.shared.athena.StreamAlertAthenaClient.drop_table')
    def test_drop_all_tables_failure(self, drop_table_mock):
        """Athena - Drop All Tables, Failure"""
        self.client._client.results = [
            {
                'Data': [{
                    'VarCharValue': 'table_01'
                }]
            },
            {
                'Data': [{
                    'VarCharValue': 'table_02'
                }]
            },
            {
                'Data': [{
                    'VarCharValue': 'table_03'
                }]
            },
        ]
        drop_table_mock.side_effect = [True, True, False]
        assert_false(self.client.drop_all_tables())

    def test_drop_all_tables_exception(self):
        """Athena - Drop All Tables, Exception"""
        self.client._client.raise_exception = True
        assert_false(self.client.drop_all_tables())

    @patch('logging.Logger.exception')
    def test_execute_query(self, log_mock):
        """Athena - Execute Query"""
        self.client._client.raise_exception = True
        self.client._execute_query('BAD SQL')
        log_mock.assert_called_with('Athena query failed')

    def test_execute_and_wait(self):
        """Athena - Execute and Wait"""
        self.client._client.results = [
            {
                'Data': [{
                    'VarCharValue': 'result'
                }]
            },
        ]
        result = self.client._execute_and_wait('SQL query')
        assert_true(result in self.client._client.query_executions)

    @patch('logging.Logger.error')
    def test_execute_and_wait_failed(self, log_mock):
        """Athena - Execute and Wait, Failed"""
        query = 'SQL query'
        self.client._client.result_state = 'FAILED'
        self.client._execute_and_wait(query)
        log_mock.assert_called_with('Athena query failed:\n%s', query)

    def test_query_result_paginator(self):
        """Athena - Query Result Paginator"""
        data = {'Data': [{'VarCharValue': 'result'}]}
        self.client._client.results = [
            data,
        ]

        items = list(self.client.query_result_paginator('test query'))
        assert_items_equal(items, [{'ResultSet': {'Rows': [data]}}] * 4)

    def test_query_result_paginator_error(self):
        """Athena - Query Result Paginator, Exception"""
        self.client._client.raise_exception = True
        assert_equal(list(self.client.query_result_paginator('test query')),
                     list())

    def test_run_async_query(self):
        """Athena - Run Async Query, Success"""
        assert_true(self.client.run_async_query('test query'))

    def test_run_async_query_failure(self):
        """Athena - Run Async Query, Failure"""
        self.client._client.raise_exception = True
        assert_false(self.client.run_async_query('test query'))