def repartition(ctx, num_physical_shards): """Repartition DIRBS Core tables into a new number of physical IMEI shards.""" logger = logging.getLogger('dirbs.db') config = common.ensure_config(ctx) with utils.create_db_connection( config.db_config) as conn, conn.cursor() as cursor: logger.info( 'Repartitioning DB schema in DB %s on host %s into %d physical shards...', config.db_config.database, config.db_config.host, num_physical_shards) logger.info('Re-partitioning classification_state table...') partition_utils.repartition_classification_state( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned classification_state table') logger.info('Re-partitioning registration_list table...') partition_utils.repartition_registration_list( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned registration_list table') logger.info('Re-partitioning stolen_list table...') partition_utils.repartition_stolen_list( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned stolen_list table') logger.info('Re-partitioning pairing_list table...') partition_utils.repartition_pairing_list( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned pairing_list table') logger.info('Re-partitioning blacklist table...') partition_utils.repartition_blacklist( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned blacklist table') logger.info('Re-partitioning notifications_lists table...') partition_utils.repartition_notifications_lists( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned notifications_lists table') logger.info('Re-partitioning exceptions_lists table...') partition_utils.repartition_exceptions_lists( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned exceptions_lists table') logger.info('Re-partitioning network_imeis table...') partition_utils.repartition_network_imeis( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned network_imeis table') logger.info('Re-partitioning monthly_network_triplets tables...') partition_utils.repartition_monthly_network_triplets( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned monthly_network_triplets tables') # Update schema metadata table cursor.execute('UPDATE schema_metadata SET phys_shards = %s', [num_physical_shards])
def upgrade(self, db_conn): # noqa: C901 """Overrides AbstractMigrator upgrade method.""" logger = logging.getLogger('dirbs.db') with db_conn.cursor() as cursor: cursor.execute( """CREATE FUNCTION calc_virt_imei_shard(imei TEXT) RETURNS SMALLINT LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE AS $$ BEGIN RETURN SUBSTRING(COALESCE(imei, ''), 13, 2)::SMALLINT; EXCEPTION WHEN OTHERS THEN RETURN 0; END; $$""") # By default, create 4 shards num_initial_shards = 4 logger.info('Re-partitioning classification_state table...') cursor.execute( 'ALTER TABLE classification_state ADD COLUMN virt_imei_shard SMALLINT' ) cursor.execute( 'UPDATE classification_state SET virt_imei_shard = calc_virt_imei_shard(imei_norm)' ) cursor.execute( 'ALTER TABLE classification_state ALTER COLUMN virt_imei_shard SET NOT NULL' ) part_utils.repartition_classification_state( db_conn, num_physical_shards=num_initial_shards) logger.info('Re-partitioned classification_state table') logger.info('Re-partitioning registration_list table...') cursor.execute( 'ALTER TABLE historic_registration_list ADD COLUMN virt_imei_shard SMALLINT' ) cursor.execute( 'UPDATE historic_registration_list SET virt_imei_shard = calc_virt_imei_shard(imei_norm)' ) cursor.execute( 'ALTER TABLE historic_registration_list ALTER COLUMN virt_imei_shard SET NOT NULL' ) self.partition_registration_list( db_conn, num_physical_shards=num_initial_shards) logger.info('Re-partitioned registration_list table') logger.info('Re-partitioning pairing_list table...') cursor.execute( 'ALTER TABLE historic_pairing_list ADD COLUMN virt_imei_shard SMALLINT' ) cursor.execute( 'UPDATE historic_pairing_list SET virt_imei_shard = calc_virt_imei_shard(imei_norm)' ) cursor.execute( 'ALTER TABLE historic_pairing_list ALTER COLUMN virt_imei_shard SET NOT NULL' ) part_utils.repartition_pairing_list( db_conn, num_physical_shards=num_initial_shards) logger.info('Re-partitioned pairing_list table') logger.info('Re-partitioning blacklist table...') cursor.execute( 'ALTER TABLE blacklist ADD COLUMN virt_imei_shard SMALLINT') cursor.execute( 'UPDATE blacklist SET virt_imei_shard = calc_virt_imei_shard(imei_norm)' ) cursor.execute( 'ALTER TABLE blacklist ALTER COLUMN virt_imei_shard SET NOT NULL' ) part_utils.repartition_blacklist( db_conn, num_physical_shards=num_initial_shards) logger.info('Re-partitioned blacklist table') # Need to make sure owner of list tables is dirbs_core_listgen logger.info('Re-partitioning notifications_lists table...') # The original notifications_lists were not created with a single sequence for the IDs, so just do now with utils.db_role_setter(db_conn, role_name='dirbs_core_listgen'): cursor.execute( """CREATE UNLOGGED TABLE notifications_lists_new ( row_id BIGSERIAL NOT NULL, operator_id TEXT NOT NULL, imei_norm TEXT NOT NULL, imsi TEXT NOT NULL, msisdn TEXT NOT NULL, block_date DATE NOT NULL, reasons TEXT[] NOT NULL, amnesty_granted BOOLEAN DEFAULT FALSE NOT NULL, start_run_id BIGINT NOT NULL, end_run_id BIGINT, delta_reason TEXT NOT NULL CHECK (delta_reason IN ('new', 'resolved', 'blacklisted', 'no_longer_seen', 'changed')), virt_imei_shard SMALLINT NOT NULL ) PARTITION BY LIST (operator_id) """) # Work out who the operators are partitions = utils.child_table_names(db_conn, 'notifications_lists') # Make sure that they are owned by dirbs_core_listgen (they can be owner by dirbs_core_power_user) # due to bad previous migration scripts with utils.db_role_setter(db_conn, role_name='dirbs_core_power_user'): for p in partitions: cursor.execute( sql.SQL('ALTER TABLE {0} OWNER TO dirbs_core_listgen'). format(sql.Identifier(p))) operators = [ x.operator_id for x in utils.table_invariants_list( db_conn, partitions, ['operator_id']) ] # Create operator child partitions for op_id in operators: tbl_name = part_utils.per_mno_lists_partition( operator_id=op_id, suffix='_new', list_type='notifications') part_utils.create_per_mno_lists_partition( db_conn, operator_id=op_id, parent_tbl_name='notifications_lists_new', tbl_name=tbl_name, num_physical_shards=1, unlogged=True, fillfactor=100) cursor.execute( """INSERT INTO notifications_lists_new(operator_id, imei_norm, imsi, msisdn, block_date, reasons, start_run_id, end_run_id, delta_reason, virt_imei_shard) SELECT operator_id, imei_norm, imsi, msisdn, block_date, reasons, start_run_id, end_run_id, delta_reason, calc_virt_imei_shard(imei_norm) FROM notifications_lists """) # Drop old table, rename tables, indexes and constraints cursor.execute("""ALTER TABLE notifications_lists_new RENAME CONSTRAINT notifications_lists_new_delta_reason_check TO notifications_lists_delta_reason_check""") cursor.execute('DROP TABLE notifications_lists CASCADE') cursor.execute("""ALTER SEQUENCE notifications_lists_new_row_id_seq RENAME TO notifications_lists_row_id_seq""") part_utils.rename_table_and_indices( db_conn, old_tbl_name='notifications_lists_new', new_tbl_name='notifications_lists') part_utils.repartition_notifications_lists( db_conn, num_physical_shards=num_initial_shards) logger.info('Re-partitioned notifications_lists table') logger.info('Re-partitioning exceptions_lists table...') # The original exceptions_lists were not created with a single sequence for the IDs, so just do now with utils.db_role_setter(db_conn, role_name='dirbs_core_listgen'): cursor.execute("""CREATE UNLOGGED TABLE exceptions_lists_new ( row_id BIGSERIAL NOT NULL, operator_id TEXT NOT NULL, imei_norm TEXT NOT NULL, imsi TEXT NOT NULL, start_run_id BIGINT NOT NULL, end_run_id BIGINT, delta_reason TEXT NOT NULL CHECK (delta_reason IN ('added', 'removed')), virt_imei_shard SMALLINT NOT NULL ) PARTITION BY LIST (operator_id) """) # Work out who the operators are partitions = utils.child_table_names(db_conn, 'exceptions_lists') # Make sure that they are owned by dirbs_core_listgen (they can be owner by dirbs_core_power_user) # due to bad previous migration scripts with utils.db_role_setter(db_conn, role_name='dirbs_core_power_user'): for p in partitions: cursor.execute( sql.SQL('ALTER TABLE {0} OWNER TO dirbs_core_listgen'). format(sql.Identifier(p))) operators = [ x.operator_id for x in utils.table_invariants_list( db_conn, partitions, ['operator_id']) ] # Create operator child partitions for op_id in operators: tbl_name = part_utils.per_mno_lists_partition( operator_id=op_id, suffix='_new', list_type='exceptions') part_utils.create_per_mno_lists_partition( db_conn, operator_id=op_id, parent_tbl_name='exceptions_lists_new', tbl_name=tbl_name, num_physical_shards=1, unlogged=True, fillfactor=100) cursor.execute( """INSERT INTO exceptions_lists_new(operator_id, imei_norm, imsi, start_run_id, end_run_id, delta_reason, virt_imei_shard) SELECT operator_id, imei_norm, imsi, start_run_id, end_run_id, delta_reason, calc_virt_imei_shard(imei_norm) FROM exceptions_lists """) # Drop old table, rename tables, indexes and constraints cursor.execute("""ALTER TABLE exceptions_lists_new RENAME CONSTRAINT exceptions_lists_new_delta_reason_check TO exceptions_lists_delta_reason_check""") cursor.execute('DROP TABLE exceptions_lists CASCADE') cursor.execute( 'ALTER SEQUENCE exceptions_lists_new_row_id_seq RENAME TO exceptions_lists_row_id_seq' ) part_utils.rename_table_and_indices( db_conn, old_tbl_name='exceptions_lists_new', new_tbl_name='exceptions_lists') part_utils.repartition_exceptions_lists( db_conn, num_physical_shards=num_initial_shards) logger.info('Re-partitioned exceptions_lists table') logger.info('Re-partitioning seen_imeis (network_imeis) table') # First, just put everything in a temporary table so that we can call partutils with utils.db_role_setter(db_conn, role_name='dirbs_core_import_operator'): cursor.execute("""CREATE UNLOGGED TABLE network_imeis ( first_seen DATE NOT NULL, last_seen DATE NOT NULL, seen_rat_bitmask INTEGER, imei_norm TEXT NOT NULL, virt_imei_shard SMALLINT NOT NULL ) """) # # We disable index scans here as doing a merge append with index scans is much slower and involves # a lot of seeks which kills performance on non-SSD drives. Better to use an append plan and sort # the results by imei_norm # cursor.execute('SET enable_indexscan = false') cursor.execute("""INSERT INTO network_imeis SELECT MIN(first_seen), MAX(last_seen), bit_or(seen_rat_bitmask), imei_norm, calc_virt_imei_shard(imei_norm) FROM seen_imeis GROUP BY imei_norm """) cursor.execute('SET enable_indexscan = true') part_utils.repartition_network_imeis( db_conn, num_physical_shards=num_initial_shards) cursor.execute('DROP TABLE seen_imeis CASCADE') logger.info('Re-partitioned seen_imeis (network_imeis) table') # First, just put all country-level triplets in a temporary table so that we can call partition_utils with utils.db_role_setter(db_conn, role_name='dirbs_core_import_operator'): cursor.execute( """CREATE UNLOGGED TABLE monthly_network_triplets_country ( triplet_year SMALLINT NOT NULL, triplet_month SMALLINT NOT NULL, first_seen DATE NOT NULL, last_seen DATE NOT NULL, date_bitmask INTEGER NOT NULL, triplet_hash UUID NOT NULL, imei_norm TEXT, imsi TEXT, msisdn TEXT, virt_imei_shard SMALLINT NOT NULL, CHECK (last_seen >= first_seen), CHECK (EXTRACT(month FROM last_seen) = triplet_month AND EXTRACT(year FROM last_seen) = triplet_year), CHECK (EXTRACT(month FROM first_seen) = triplet_month AND EXTRACT(year FROM first_seen) = triplet_year) ) PARTITION BY RANGE (triplet_year, triplet_month) """) # Work out what partitions to create and create them partitions = utils.child_table_names(db_conn, 'seen_triplets') # Make sure that they are owned by dirbs_core_import_operator (they can be owner by dirbs_core_power_user) # due to bad previous migration scripts with utils.db_role_setter(db_conn, role_name='dirbs_core_power_user'): for p in partitions: cursor.execute( sql.SQL( 'ALTER TABLE {0} OWNER TO dirbs_core_import_operator' ).format(sql.Identifier(p))) year_month_tuples = { (x.triplet_year, x.triplet_month) for x in utils.table_invariants_list( db_conn, partitions, ['triplet_year', 'triplet_month']) } for year, month in year_month_tuples: part_utils.create_monthly_network_triplets_country_partition( db_conn, month=month, year=year, num_physical_shards=1) with utils.db_role_setter(db_conn, role_name='dirbs_core_import_operator'): cursor.execute( """CREATE UNLOGGED TABLE monthly_network_triplets_per_mno ( LIKE monthly_network_triplets_country INCLUDING ALL, operator_id TEXT NOT NULL ) PARTITION BY LIST (operator_id) """) # Work out what partitions to create and create them op_year_month_tuples = { (x.operator_id, x.triplet_year, x.triplet_month) for x in utils.table_invariants_list( db_conn, partitions, ['operator_id', 'triplet_year', 'triplet_month']) } # Create child partitions at per-MNO level for op, year, month in op_year_month_tuples: part_utils.create_monthly_network_triplets_per_mno_partition( db_conn, operator_id=op, month=month, year=year, num_physical_shards=1) # Create temporary monthly_network_triplets_per_mno table for year, month in year_month_tuples: logger.info( 'Generating temporary monthly_network_triplets_per_mno entries for {0:02d}/{1:d}...' .format(month, year)) cursor.execute( """INSERT INTO monthly_network_triplets_per_mno SELECT %(year)s, %(month)s, first_seen, last_seen, date_bitmask, triplet_hash, imei_norm, imsi, msisdn, calc_virt_imei_shard(imei_norm), operator_id FROM seen_triplets WHERE triplet_year = %(year)s AND triplet_month = %(month)s """, { 'year': year, 'month': month }) logger.info( 'Generated temporary monthly_network_triplets_per_mno entries for {0:02d}/{1:d}' .format(month, year)) # Create temporary monthly_network_triplets_country table. We need to do this monthly as we need # to aggregate by triplets on a monthly basis # # We disable index scans here as doing a merge append with index scans is much slower and involves # a lot of seeks which kills performance on non-SSD drives. Better to use an append plan and sort # the results by imei_norm # cursor.execute('SET enable_indexscan = false') for year, month in year_month_tuples: logger.info( 'Generating temporary monthly_network_triplets_country entries for {0:02d}/{1:d}...' .format(month, year)) cursor.execute( """INSERT INTO monthly_network_triplets_country SELECT %(year)s, %(month)s, MIN(first_seen), MAX(last_seen), bit_or(date_bitmask), triplet_hash, FIRST(imei_norm), FIRST(imsi), FIRST(msisdn), calc_virt_imei_shard(FIRST(imei_norm)) FROM seen_triplets WHERE triplet_year = %(year)s AND triplet_month = %(month)s GROUP BY triplet_hash """, { 'year': year, 'month': month }) logger.info( 'Generated temporary monthly_network_triplets_country entries for {0:02d}/{1:d}' .format(month, year)) cursor.execute('SET enable_indexscan = true') logger.info( 'Re-partitioning temporary monthly_network_triplets tables...') # Previously, the operator_data view was owned by dirbs_core_power_user but is now owned by the # dirbs_core_import_operator since it must be re-created with utils.db_role_setter(db_conn, role_name='dirbs_core_power_user'): cursor.execute( 'ALTER VIEW operator_data OWNER TO dirbs_core_import_operator' ) part_utils.repartition_monthly_network_triplets( db_conn, num_physical_shards=num_initial_shards) cursor.execute('DROP TABLE seen_triplets CASCADE') logger.info( 'Re-partitioned temporary monthly_network_triplets tables') # Replace list generation function to include virt_imei_shard cursor.execute(""" DROP FUNCTION gen_blacklist(run_id BIGINT); DROP FUNCTION gen_notifications_list(op_id TEXT, run_id BIGINT); DROP FUNCTION gen_exceptions_list(op_id TEXT, run_id BIGINT); -- -- Create function to generate a full blacklist for a given run_id. A value of -1 means get the latest -- list. -- CREATE FUNCTION gen_blacklist(run_id BIGINT = -1) RETURNS TABLE ( imei_norm TEXT, virt_imei_shard SMALLINT, block_date DATE, reasons TEXT[] ) LANGUAGE plpgsql STRICT STABLE PARALLEL SAFE AS $$ DECLARE query_run_id BIGINT; BEGIN -- -- If we don't specify a run_id, just set to the maximum run_id which will always return all rows -- where end_run_id is NULL -- IF run_id = -1 THEN run_id := max_bigint(); END IF; RETURN QUERY SELECT bl.imei_norm, bl.virt_imei_shard, bl.block_date, bl.reasons FROM blacklist bl WHERE bl.delta_reason != 'unblocked' AND run_id >= bl.start_run_id AND (run_id < bl.end_run_id OR bl.end_run_id IS NULL); END $$; -- -- Create function to generate a full notifications_list for a given run_id and operator ID. A value -- of -1 means get the latest list. -- CREATE FUNCTION gen_notifications_list(op_id TEXT, run_id BIGINT = -1) RETURNS TABLE ( imei_norm TEXT, virt_imei_shard SMALLINT, imsi TEXT, msisdn TEXT, block_date DATE, reasons TEXT[], amnesty_granted BOOLEAN ) LANGUAGE plpgsql STRICT STABLE PARALLEL SAFE AS $$ BEGIN -- -- If we don't specify a run_id, just set to the maximum run_id which will always return all rows -- where end_run_id is NULL -- IF run_id = -1 THEN run_id := max_bigint(); END IF; RETURN QUERY SELECT nl.imei_norm, nl.virt_imei_shard, nl.imsi, nl.msisdn, nl.block_date, nl.reasons, nl.amnesty_granted FROM notifications_lists nl WHERE nl.operator_id = op_id AND nl.delta_reason NOT IN ('resolved', 'blacklisted') AND run_id >= nl.start_run_id AND (run_id < nl.end_run_id OR nl.end_run_id IS NULL); END $$; -- -- Create function to generate a full exceptions_list for a given run_id and operator ID. A value -- of -1 means get the latest list. -- CREATE FUNCTION gen_exceptions_list(op_id TEXT, run_id BIGINT = -1) RETURNS TABLE ( imei_norm TEXT, virt_imei_shard SMALLINT, imsi TEXT ) LANGUAGE plpgsql STRICT STABLE PARALLEL SAFE AS $$ BEGIN -- -- If we don't specify a run_id, just set to the maximum run_id which will always return all -- rows where end_run_id is NULL -- IF run_id = -1 THEN run_id := max_bigint(); END IF; RETURN QUERY SELECT el.imei_norm, el.virt_imei_shard, el.imsi FROM exceptions_lists el WHERE el.operator_id = op_id AND el.delta_reason != 'removed' AND run_id >= el.start_run_id AND (run_id < el.end_run_id OR el.end_run_id IS NULL); END $$; """) # noqa: Q440, Q441 # Update schema metadata table cursor.execute( """ALTER TABLE schema_metadata ADD COLUMN phys_shards SMALLINT NOT NULL DEFAULT %s CHECK (phys_shards > 0 AND phys_shards <= 100)""", [num_initial_shards]) cursor.execute( 'ALTER TABLE schema_metadata ALTER COLUMN phys_shards DROP DEFAULT' ) # Drop obsolete columns cursor.execute( 'ALTER TABLE schema_metadata DROP COLUMN potential_whitespace_imsis_msisdns' ) cursor.execute( 'ALTER TABLE report_monthly_stats DROP COLUMN num_whitespace_imsi_records' ) cursor.execute( 'ALTER TABLE report_monthly_stats DROP COLUMN num_whitespace_msisdn_records' )
def lists(ctx, config, statsd, logger, run_id, conn, metadata_conn, command, metrics_root, metrics_run_root): """Prune obsolete lists data.""" curr_date = ctx.obj['CURR_DATE'] # store metadata metadata.add_optional_job_metadata( metadata_conn, command, run_id, retention_months=config.retention_config.months_retention) logger.info( 'Pruning lists tables to remove any obsolete data with end_time outside the retention window..' ) retention_months = config.retention_config.months_retention if curr_date is None: curr_date = datetime.date.today() first_month_to_drop = datetime.date( curr_date.year, curr_date.month, 1) - relativedelta.relativedelta(months=retention_months) logger.info( 'Lists data with end_time earlier than {0} will be pruned'.format( first_month_to_drop)) with utils.db_role_setter( conn, role_name='dirbs_core_power_user'), conn.cursor() as cursor: logger.debug('Calculating original number of rows in lists tables...') row_count_sql = sql.SQL( """SELECT blacklist_row_count, noft_lists_row_count, excp_lists_row_count FROM (SELECT COUNT(*) FROM blacklist) AS blacklist_row_count, (SELECT COUNT(*) FROM notifications_lists) AS noft_lists_row_count, (SELECT COUNT(*) FROM exceptions_lists) AS excp_lists_row_count""" ) cursor.execute(row_count_sql) rows_before = cursor.fetchone() blacklist_rows_before = int( rows_before.blacklist_row_count.strip('()')) notflist_rows_before = int( rows_before.noft_lists_row_count.strip('()')) excplist_rows_before = int( rows_before.excp_lists_row_count.strip('()')) rows_before = blacklist_rows_before + notflist_rows_before + excplist_rows_before logger.debug('Calculated original number of rows in lists tables...') statsd.gauge('{0}blacklist_rows_before'.format(metrics_run_root), blacklist_rows_before) statsd.gauge( '{0}notifications_lists_rows_before'.format(metrics_run_root), notflist_rows_before) statsd.gauge( '{0}exceptions_lists_rows_before'.format(metrics_run_root), excplist_rows_before) metadata.add_optional_job_metadata( metadata_conn, command, run_id, blacklist_rows_before=blacklist_rows_before, notifications_lists_rows_before=notflist_rows_before, exceptions_lists_rows_before=excplist_rows_before) # Calculate number of rows in the lists table outside the retention window job_metadata_filter_sql = """SELECT run_id FROM job_metadata WHERE command = 'dirbs-listgen' AND end_time < '{0}'""".format( first_month_to_drop) cursor.execute( sql.SQL("""SELECT COUNT(*) FROM blacklist WHERE start_run_id IN ({0})""".format( job_metadata_filter_sql))) total_bl_rows_out_window_to_prune = cursor.fetchone()[0] logger.info( 'Found {0:d} rows of blacklist table outside the retention window to prune' .format(total_bl_rows_out_window_to_prune)) cursor.execute( sql.SQL("""SELECT COUNT(*) FROM notifications_lists WHERE start_run_id IN ({0})""".format( job_metadata_filter_sql))) total_nl_rows_out_window_to_prune = cursor.fetchone()[0] logger.info( 'Found {0:d} rows of notifications lists table outside the retention window to prune' .format(total_nl_rows_out_window_to_prune)) cursor.execute( sql.SQL("""SELECT COUNT(*) FROM exceptions_lists WHERE start_run_id IN ({0})""".format( job_metadata_filter_sql))) total_nl_rows_out_window_to_prune = cursor.fetchone()[0] logger.info( 'Found {0:d} rows of exceptions lists table outside the retention window to prune' .format(total_nl_rows_out_window_to_prune)) # We repartition the tables to re-create them, passing a condition sql logger.debug('Re-creating blacklist table...') num_phys_imei_shards = partition_utils.num_physical_imei_shards(conn) src_filter_sql = cursor.mogrify( """WHERE start_run_id NOT IN ({0})""".format( job_metadata_filter_sql)) partition_utils.repartition_blacklist( conn, num_physical_shards=num_phys_imei_shards, src_filter_sql=str(src_filter_sql, encoding=conn.encoding)) logger.debug('Re-created blacklist table') logger.debug('Re-creating notifications lists table...') partition_utils.repartition_notifications_lists( conn, num_physical_shards=num_phys_imei_shards, src_filter_sql=str(src_filter_sql, encoding=conn.encoding)) logger.debug('Re-created notifications lists table') logger.debug('Re-creating exceptions lists table...') partition_utils.repartition_exceptions_lists( conn, num_physical_shards=num_phys_imei_shards, src_filter_sql=str(src_filter_sql, encoding=conn.encoding)) logger.debug('Re-created exceptions lists table') logger.debug('Calculating new number of rows in lists tables...') cursor.execute(row_count_sql) rows_after = cursor.fetchone() blacklist_rows_after = int(rows_after.blacklist_row_count.strip('()')) notflist_rows_after = int(rows_after.noft_lists_row_count.strip('()')) excplist_rows_after = int(rows_after.excp_lists_row_count.strip('()')) rows_after = blacklist_rows_after + notflist_rows_after + excplist_rows_after logger.debug('Calculated new number of rows in lists tables') statsd.gauge('{0}blacklist_rows_after'.format(metrics_run_root), blacklist_rows_after) statsd.gauge( '{0}notifications_lists_rows_after'.format(metrics_run_root), notflist_rows_after) statsd.gauge('{0}exceptions_lists_rows_after'.format(metrics_run_root), excplist_rows_after) metadata.add_optional_job_metadata( metadata_conn, command, run_id, blacklist_rows_before=blacklist_rows_after, notifications_lists_rows_before=notflist_rows_after, exceptions_lists_rows_before=excplist_rows_after) logger.info('Pruned {0:d} rows from lists tables'.format(rows_after - rows_before))