def rebuild_partitions(table, bucket, config): """Rebuild an Athena table's partitions Steps: - Get the list of current partitions - Destroy existing table - Re-create tables - Re-create partitions Args: table (str): The name of the table being rebuilt bucket (str): The s3 bucket to be used as the location for Athena data table_type (str): The type of table being refreshed Types of 'data' and 'alert' are accepted, but only 'data' is implemented config (CLIConfig): Loaded StreamAlert config Returns: bool: False if errors occurred, True otherwise """ sanitized_table_name = FirehoseClient.firehose_log_name(table) athena_client = get_athena_client(config) # Get the current set of partitions partitions = athena_client.get_table_partitions(sanitized_table_name) if not partitions: LOGGER.info('No partitions to rebuild for %s, nothing to do', sanitized_table_name) return False # Drop the table LOGGER.info('Dropping table %s', sanitized_table_name) if not athena_client.drop_table(sanitized_table_name): return False LOGGER.info('Creating table %s', sanitized_table_name) # Re-create the table with previous partitions if not create_table(table, bucket, config): return False new_partitions_statements = helpers.add_partition_statements( partitions, bucket, sanitized_table_name) LOGGER.info('Creating total %d new partitions for %s', len(partitions), sanitized_table_name) for idx, statement in enumerate(new_partitions_statements): success = athena_client.run_query(query=statement) LOGGER.info('Rebuilt partitions part %d', idx + 1) if not success: LOGGER.error('Error re-creating new partitions for %s', sanitized_table_name) write_partitions_statements(new_partitions_statements, sanitized_table_name) return False LOGGER.info('Successfully rebuilt all partitions for %s', sanitized_table_name) return True
def test_add_partition_statements_exceed_length(): """CLI - Athena Add Partition Statement when statement exceed max query length""" partitions = { 'dt=2017-12-01-01', 'dt=2016-12-01-02', 'dt=2018-12-01-05', 'dt=2013-12-01-04', } results = helpers.add_partition_statements(partitions, 'bucket', 'test') results_copy = list(results) assert_equal(len(results_copy), 2) expected_result_0 = ("ALTER TABLE test ADD IF NOT EXISTS " "PARTITION (dt = '2013-12-01-04') " "LOCATION 's3://bucket/test/2013/12/01/04' " "PARTITION (dt = '2016-12-01-02') " "LOCATION 's3://bucket/test/2016/12/01/02'") expected_result_1 = ("ALTER TABLE test ADD IF NOT EXISTS " "PARTITION (dt = '2017-12-01-01') " "LOCATION 's3://bucket/test/2017/12/01/01' " "PARTITION (dt = '2018-12-01-05') " "LOCATION 's3://bucket/test/2018/12/01/05'") assert_equal(results_copy[0], expected_result_0) assert_equal(results_copy[1], expected_result_1)