Exemple #1
0
def rebuild_partitions(table, bucket, config):
    """Rebuild an Athena table's partitions

    Steps:
      - Get the list of current partitions
      - Destroy existing table
      - Re-create tables
      - Re-create partitions

    Args:
        table (str): The name of the table being rebuilt
        bucket (str): The s3 bucket to be used as the location for Athena data
        table_type (str): The type of table being refreshed
            Types of 'data' and 'alert' are accepted, but only 'data' is implemented
        config (CLIConfig): Loaded StreamAlert config

    Returns:
        bool: False if errors occurred, True otherwise
    """
    sanitized_table_name = FirehoseClient.firehose_log_name(table)

    athena_client = get_athena_client(config)

    # Get the current set of partitions
    partitions = athena_client.get_table_partitions(sanitized_table_name)
    if not partitions:
        LOGGER.info('No partitions to rebuild for %s, nothing to do',
                    sanitized_table_name)
        return False

    # Drop the table
    LOGGER.info('Dropping table %s', sanitized_table_name)
    if not athena_client.drop_table(sanitized_table_name):
        return False

    LOGGER.info('Creating table %s', sanitized_table_name)

    # Re-create the table with previous partitions
    if not create_table(table, bucket, config):
        return False

    new_partitions_statements = helpers.add_partition_statements(
        partitions, bucket, sanitized_table_name)

    LOGGER.info('Creating total %d new partitions for %s', len(partitions),
                sanitized_table_name)

    for idx, statement in enumerate(new_partitions_statements):
        success = athena_client.run_query(query=statement)
        LOGGER.info('Rebuilt partitions part %d', idx + 1)
        if not success:
            LOGGER.error('Error re-creating new partitions for %s',
                         sanitized_table_name)
            write_partitions_statements(new_partitions_statements,
                                        sanitized_table_name)
            return False

    LOGGER.info('Successfully rebuilt all partitions for %s',
                sanitized_table_name)
    return True
Exemple #2
0
def test_add_partition_statements_exceed_length():
    """CLI - Athena Add Partition Statement when statement exceed max query length"""
    partitions = {
        'dt=2017-12-01-01',
        'dt=2016-12-01-02',
        'dt=2018-12-01-05',
        'dt=2013-12-01-04',
    }

    results = helpers.add_partition_statements(partitions, 'bucket', 'test')
    results_copy = list(results)
    assert_equal(len(results_copy), 2)

    expected_result_0 = ("ALTER TABLE test ADD IF NOT EXISTS "
                         "PARTITION (dt = '2013-12-01-04') "
                         "LOCATION 's3://bucket/test/2013/12/01/04' "
                         "PARTITION (dt = '2016-12-01-02') "
                         "LOCATION 's3://bucket/test/2016/12/01/02'")
    expected_result_1 = ("ALTER TABLE test ADD IF NOT EXISTS "
                         "PARTITION (dt = '2017-12-01-01') "
                         "LOCATION 's3://bucket/test/2017/12/01/01' "
                         "PARTITION (dt = '2018-12-01-05') "
                         "LOCATION 's3://bucket/test/2018/12/01/05'")
    assert_equal(results_copy[0], expected_result_0)
    assert_equal(results_copy[1], expected_result_1)