def queue_calc_imeis_jobs(self, executor, app_config, run_id, curr_date):
        """
        Method to queue jobs to calculate the IMEIs that are met by this condition.

        Arguments:
            executor: instance of the python executor class, to submit back the results
            app_config: dirbs app current configuration, to extract various configs required for the job
            run_id: run id of the current classification job
            curr_date: current date of the system
        """
        with create_db_connection(app_config.db_config) as conn, conn.cursor() as cursor:
            cursor.execute(sql.SQL("""CREATE UNLOGGED TABLE {intermediate_tbl} (
                                          imei_norm TEXT NOT NULL,
                                          virt_imei_shard SMALLINT NOT NULL
                                      )
                                      PARTITION BY RANGE (virt_imei_shard)""")
                           .format(intermediate_tbl=self.intermediate_tbl_id(run_id)))
            partition_utils.create_imei_shard_partitions(conn, tbl_name=self.intermediate_tbl_name(run_id),
                                                         unlogged=True)
            parallel_shards = partition_utils.num_physical_imei_shards(conn)

        # Done with connection -- temp tables should now be committed
        virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(parallel_shards)
        for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges:
            yield executor.submit(self._calc_imeis_job,
                                  app_config,
                                  run_id,
                                  curr_date,
                                  virt_imei_range_start,
                                  virt_imei_range_end)
Exemple #2
0
 def queue_update_classification_state_jobs(self, executor, app_config,
                                            run_id, curr_date):
     """Method to queue jobs to update the classification_state table after the IMEIs have been calculated."""
     with create_db_connection(app_config.db_config) as conn:
         parallel_shards = partition_utils.num_physical_imei_shards(conn)
         virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(
             parallel_shards)
         for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges:
             yield executor.submit(self._update_classification_state_job,
                                   app_config, run_id, curr_date,
                                   virt_imei_range_start,
                                   virt_imei_range_end)
    def upgrade(self, db_conn):
        """Overrides AbstractMigrator upgrade method."""
        with db_conn.cursor() as cursor:
            logger = logging.getLogger('dirbs.db')
            logger.info('Re-partitioning stolen_list table...')
            cursor.execute('ALTER TABLE historic_stolen_list ADD COLUMN virt_imei_shard SMALLINT')
            cursor.execute('UPDATE historic_stolen_list SET virt_imei_shard = calc_virt_imei_shard(imei_norm)')
            cursor.execute('ALTER TABLE historic_stolen_list ALTER COLUMN virt_imei_shard SET NOT NULL')
            num_shards = partition_utils.num_physical_imei_shards(db_conn)
            partition_utils.repartition_stolen_list(db_conn, num_physical_shards=num_shards)
            logger.info('Re-partitioned stolen_list table')

            # Now that we can create tables during classification, we need to allow dirbs_core_classify to
            # create tables
            cursor.execute('GRANT CREATE ON SCHEMA core TO dirbs_core_classify')
    def queue_update_classification_state_jobs(self, executor, app_config, run_id, curr_date):
        """
        Method to queue jobs to update the classification_state table after the IMEIs have been calculated.

        Arguments:
            executor: job executor instance to submit back the results to the queue
            app_config: current dirbs app config object to use configuration from
            run_id: run_id of the current running classification job
            curr_date: current date of the system
        """
        with create_db_connection(app_config.db_config) as conn:
            parallel_shards = partition_utils.num_physical_imei_shards(conn)
            virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(parallel_shards)
            for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges:
                yield executor.submit(self._update_classification_state_job,
                                      app_config,
                                      run_id,
                                      curr_date,
                                      virt_imei_range_start,
                                      virt_imei_range_end)
Exemple #5
0
    def queue_calc_imeis_jobs(self, executor, app_config, run_id, curr_date):
        """Method to queue jobs to calculate the IMEIs that are met by this condition."""
        with create_db_connection(
                app_config.db_config) as conn, conn.cursor() as cursor:
            cursor.execute(
                sql.SQL("""CREATE UNLOGGED TABLE {intermediate_tbl} (
                                          imei_norm TEXT NOT NULL,
                                          virt_imei_shard SMALLINT NOT NULL
                                      )
                                      PARTITION BY RANGE (virt_imei_shard)""").
                format(intermediate_tbl=self.intermediate_tbl_id(run_id)))
            partition_utils.create_imei_shard_partitions(
                conn,
                tbl_name=self.intermediate_tbl_name(run_id),
                unlogged=True)
            parallel_shards = partition_utils.num_physical_imei_shards(conn)

        # Done with connection -- temp tables should now be committed
        virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(
            parallel_shards)
        for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges:
            yield executor.submit(self._calc_imeis_job, app_config, run_id,
                                  curr_date, virt_imei_range_start,
                                  virt_imei_range_end)
    def upgrade(self, conn):
        """Overrides AbstractMigrator upgrade method."""
        logger = logging.getLogger('dirbs.db')
        logger.info('Creating historic_whitelist table...')
        with conn.cursor() as cur:
            # create historic table for whitelist
            cur.execute(
                sql.SQL("""CREATE TABLE historic_whitelist (
                                       imei_norm text NOT NULL,
                                       associated BOOLEAN DEFAULT FALSE,
                                       eir_id text DEFAULT NULL,
                                       start_date TIMESTAMP NOT NULL,
                                       end_date TIMESTAMP DEFAULT NULL,
                                       virt_imei_shard SMALLINT NOT NULL
                                )
                                PARTITION BY RANGE (virt_imei_shard)"""))

            num_shards = part_utils.num_physical_imei_shards(conn)
            logger.debug('Creating Whitelist child partitions...')
            part_utils.create_imei_shard_partitions(
                conn,
                tbl_name='historic_whitelist',
                num_physical_shards=num_shards,
                fillfactor=80)

            # Add indices to each partition
            idx_metadata = [
                part_utils.IndexMetadatum(idx_cols=['imei_norm'],
                                          is_unique=True,
                                          partial_sql='WHERE end_date IS NULL')
            ]
            part_utils.add_indices(conn,
                                   tbl_name='historic_whitelist',
                                   idx_metadata=idx_metadata)

            # creating views to historic_whitelist
            cur.execute("""CREATE VIEW whitelist AS
                                SELECT imei_norm, associated, eir_id, virt_imei_shard
                                  FROM historic_whitelist
                                 WHERE end_date IS NULL WITH CHECK OPTION""")

            # create view for imeis that are not associated yet
            cur.execute("""CREATE VIEW available_whitelist AS
                                SELECT imei_norm, virt_imei_shard
                                  FROM historic_whitelist
                                 WHERE associated IS FALSE
                                   AND end_date IS NULL WITH CHECK OPTION""")

            # create insert & update trigger on historic_registration_list to update whitelist
            # on update and insert
            cur.execute(
                """CREATE OR REPLACE FUNCTION insert_whitelist() RETURNS TRIGGER AS
                           $BODY$
                           BEGIN
                               IF new.status = 'whitelist' OR new.status IS NULL THEN
                                INSERT INTO
                                    historic_whitelist (imei_norm, start_date, end_date, virt_imei_shard)
                                    VALUES (new.imei_norm, new.start_date, new.end_date, new.virt_imei_shard);
                               END IF;

                                      RETURN new;
                           END;
                           $BODY$
                           LANGUAGE plpgsql;

                           -- update function
                           CREATE OR REPLACE FUNCTION update_whitelist() RETURNS TRIGGER AS
                           $BODY$
                           BEGIN
                               UPDATE historic_whitelist
                                 SET end_date = new.end_date
                                WHERE imei_norm = new.imei_norm
                                  AND new.end_date IS NOT NULL;

                                  RETURN new;
                           END;
                           $BODY$
                           LANGUAGE plpgsql;

                           -- triggers
                           CREATE TRIGGER wl_insert_trigger AFTER INSERT ON historic_registration_list
                                                                        FOR EACH ROW
                                                                  EXECUTE PROCEDURE insert_whitelist();

                           CREATE TRIGGER wl_update_trigger AFTER UPDATE ON historic_registration_list
                                                                        FOR EACH ROW
                                                                  EXECUTE PROCEDURE update_whitelist();

                           ALTER TYPE job_command_type RENAME TO job_command_type_old;

                           --
                           -- Create type for command
                           --
                           CREATE TYPE job_command_type AS ENUM (
                               'dirbs-catalog',
                               'dirbs-classify',
                               'dirbs-db',
                               'dirbs-import',
                               'dirbs-listgen',
                               'dirbs-prune',
                               'dirbs-report',
                               'dirbs-whitelist'
                           );

                           ALTER TABLE job_metadata ALTER COLUMN command TYPE job_command_type
                              USING command::TEXT::job_command_type;

                           DROP TYPE job_command_type_old;

                           --
                           -- Whitelist notification triggers
                           --
                           CREATE FUNCTION notify_insert_distributor() RETURNS TRIGGER AS
                           $BODY$
                           BEGIN
                               IF new.associated IS FALSE AND new.eir_id IS NULL THEN
                                PERFORM pg_notify('distributor_updates', row_to_json(NEW)::text);
                               END IF;
                               RETURN new;
                           END;
                           $BODY$
                           LANGUAGE plpgsql VOLATILE COST 100;

                           CREATE FUNCTION notify_remove_distributor() RETURNS TRIGGER AS
                           $BODY$
                           BEGIN
                                IF new.end_date IS NOT NULL THEN
                                 PERFORM pg_notify('distributor_updates', row_to_json(NEW)::text);
                                END IF;
                                RETURN new;
                           END;
                           $BODY$
                           LANGUAGE plpgsql VOLATILE COST 100;

                           CREATE TRIGGER notify_insert_trigger AFTER INSERT ON historic_whitelist
                                                                             FOR EACH ROW
                                                                       EXECUTE PROCEDURE notify_insert_distributor();

                           CREATE TRIGGER notify_remove_trigger AFTER UPDATE ON historic_whitelist
                                                                             FOR EACH ROW
                                                                       EXECUTE PROCEDURE notify_remove_distributor();

                           GRANT SELECT ON historic_whitelist TO dirbs_core_import_registration_list;
                           GRANT UPDATE ON historic_whitelist TO dirbs_core_import_registration_list;
                           GRANT INSERT ON historic_whitelist TO dirbs_core_import_registration_list;
                           GRANT INSERT ON historic_whitelist TO dirbs_core_white_list;
                           GRANT UPDATE ON historic_whitelist TO dirbs_core_white_list;
                           GRANT SELECT ON historic_whitelist TO dirbs_core_white_list;
                           GRANT DELETE ON historic_whitelist TO dirbs_core_white_list;
                        """)  # noqa: Q440, Q449, Q441, Q447
Exemple #7
0
def lists(ctx, config, statsd, logger, run_id, conn, metadata_conn, command,
          metrics_root, metrics_run_root):
    """Prune obsolete lists data."""
    curr_date = ctx.obj['CURR_DATE']

    # store metadata
    metadata.add_optional_job_metadata(
        metadata_conn,
        command,
        run_id,
        retention_months=config.retention_config.months_retention)

    logger.info(
        'Pruning lists tables to remove any obsolete data with end_time outside the retention window..'
    )
    retention_months = config.retention_config.months_retention

    if curr_date is None:
        curr_date = datetime.date.today()

    first_month_to_drop = datetime.date(
        curr_date.year, curr_date.month,
        1) - relativedelta.relativedelta(months=retention_months)
    logger.info(
        'Lists data with end_time earlier than {0} will be pruned'.format(
            first_month_to_drop))

    with utils.db_role_setter(
            conn, role_name='dirbs_core_power_user'), conn.cursor() as cursor:
        logger.debug('Calculating original number of rows in lists tables...')
        row_count_sql = sql.SQL(
            """SELECT blacklist_row_count, noft_lists_row_count, excp_lists_row_count
                                     FROM (SELECT COUNT(*)
                                             FROM blacklist) AS blacklist_row_count,
                                          (SELECT COUNT(*)
                                             FROM notifications_lists) AS noft_lists_row_count,
                                          (SELECT COUNT(*)
                                             FROM exceptions_lists) AS excp_lists_row_count"""
        )
        cursor.execute(row_count_sql)
        rows_before = cursor.fetchone()
        blacklist_rows_before = int(
            rows_before.blacklist_row_count.strip('()'))
        notflist_rows_before = int(
            rows_before.noft_lists_row_count.strip('()'))
        excplist_rows_before = int(
            rows_before.excp_lists_row_count.strip('()'))
        rows_before = blacklist_rows_before + notflist_rows_before + excplist_rows_before
        logger.debug('Calculated original number of rows in lists tables...')
        statsd.gauge('{0}blacklist_rows_before'.format(metrics_run_root),
                     blacklist_rows_before)
        statsd.gauge(
            '{0}notifications_lists_rows_before'.format(metrics_run_root),
            notflist_rows_before)
        statsd.gauge(
            '{0}exceptions_lists_rows_before'.format(metrics_run_root),
            excplist_rows_before)
        metadata.add_optional_job_metadata(
            metadata_conn,
            command,
            run_id,
            blacklist_rows_before=blacklist_rows_before,
            notifications_lists_rows_before=notflist_rows_before,
            exceptions_lists_rows_before=excplist_rows_before)

        # Calculate number of rows in the lists table outside the retention window
        job_metadata_filter_sql = """SELECT run_id
                                       FROM job_metadata
                                      WHERE command = 'dirbs-listgen'
                                        AND end_time < '{0}'""".format(
            first_month_to_drop)

        cursor.execute(
            sql.SQL("""SELECT COUNT(*)
                                    FROM blacklist
                                   WHERE start_run_id IN ({0})""".format(
                job_metadata_filter_sql)))
        total_bl_rows_out_window_to_prune = cursor.fetchone()[0]
        logger.info(
            'Found {0:d} rows of blacklist table outside the retention window to prune'
            .format(total_bl_rows_out_window_to_prune))

        cursor.execute(
            sql.SQL("""SELECT COUNT(*)
                                    FROM notifications_lists
                                   WHERE start_run_id IN ({0})""".format(
                job_metadata_filter_sql)))
        total_nl_rows_out_window_to_prune = cursor.fetchone()[0]
        logger.info(
            'Found {0:d} rows of notifications lists table outside the retention window to prune'
            .format(total_nl_rows_out_window_to_prune))

        cursor.execute(
            sql.SQL("""SELECT COUNT(*)
                                    FROM exceptions_lists
                                   WHERE start_run_id IN ({0})""".format(
                job_metadata_filter_sql)))
        total_nl_rows_out_window_to_prune = cursor.fetchone()[0]
        logger.info(
            'Found {0:d} rows of exceptions lists table outside the retention window to prune'
            .format(total_nl_rows_out_window_to_prune))

        # We repartition the tables to re-create them, passing a condition sql
        logger.debug('Re-creating blacklist table...')
        num_phys_imei_shards = partition_utils.num_physical_imei_shards(conn)
        src_filter_sql = cursor.mogrify(
            """WHERE start_run_id NOT IN ({0})""".format(
                job_metadata_filter_sql))
        partition_utils.repartition_blacklist(
            conn,
            num_physical_shards=num_phys_imei_shards,
            src_filter_sql=str(src_filter_sql, encoding=conn.encoding))
        logger.debug('Re-created blacklist table')

        logger.debug('Re-creating notifications lists table...')
        partition_utils.repartition_notifications_lists(
            conn,
            num_physical_shards=num_phys_imei_shards,
            src_filter_sql=str(src_filter_sql, encoding=conn.encoding))
        logger.debug('Re-created notifications lists table')

        logger.debug('Re-creating exceptions lists table...')
        partition_utils.repartition_exceptions_lists(
            conn,
            num_physical_shards=num_phys_imei_shards,
            src_filter_sql=str(src_filter_sql, encoding=conn.encoding))
        logger.debug('Re-created exceptions lists table')

        logger.debug('Calculating new number of rows in lists tables...')
        cursor.execute(row_count_sql)
        rows_after = cursor.fetchone()
        blacklist_rows_after = int(rows_after.blacklist_row_count.strip('()'))
        notflist_rows_after = int(rows_after.noft_lists_row_count.strip('()'))
        excplist_rows_after = int(rows_after.excp_lists_row_count.strip('()'))
        rows_after = blacklist_rows_after + notflist_rows_after + excplist_rows_after
        logger.debug('Calculated new number of rows in lists tables')
        statsd.gauge('{0}blacklist_rows_after'.format(metrics_run_root),
                     blacklist_rows_after)
        statsd.gauge(
            '{0}notifications_lists_rows_after'.format(metrics_run_root),
            notflist_rows_after)
        statsd.gauge('{0}exceptions_lists_rows_after'.format(metrics_run_root),
                     excplist_rows_after)
        metadata.add_optional_job_metadata(
            metadata_conn,
            command,
            run_id,
            blacklist_rows_before=blacklist_rows_after,
            notifications_lists_rows_before=notflist_rows_after,
            exceptions_lists_rows_before=excplist_rows_after)
        logger.info('Pruned {0:d} rows from lists tables'.format(rows_after -
                                                                 rows_before))
Exemple #8
0
def classification_state(ctx, config, statsd, logger, run_id, conn,
                         metadata_conn, command, metrics_root,
                         metrics_run_root):
    """Prune obsolete classification_state data."""
    curr_date = ctx.obj['CURR_DATE']

    # Store metadata
    metadata.add_optional_job_metadata(
        metadata_conn,
        command,
        run_id,
        curr_date=curr_date.isoformat() if curr_date is not None else None,
        retention_months=config.retention_config.months_retention)

    logger.info(
        'Pruning classification_state table to remove any classification state data related to '
        'obsolete conditions and data with end_date outside the retention window..'
    )

    cond_config_list = [c.label for c in config.conditions]
    retention_months = config.retention_config.months_retention

    if curr_date is None:
        curr_date = datetime.date.today()

    first_month_to_drop = datetime.date(
        curr_date.year, curr_date.month,
        1) - relativedelta.relativedelta(months=retention_months)
    logger.info(
        'Classification state data with end_date earlier than {0} will be '
        'pruned'.format(first_month_to_drop))

    with utils.db_role_setter(
            conn, role_name='dirbs_core_power_user'), conn.cursor() as cursor:
        logger.debug(
            'Calculating original number of rows in classification_state table...'
        )
        cursor.execute('SELECT COUNT(*) FROM classification_state')
        rows_before = cursor.fetchone()[0]
        logger.debug(
            'Calculated original number of rows in classification_state table')
        statsd.gauge('{0}rows_before'.format(metrics_run_root), rows_before)
        metadata.add_optional_job_metadata(metadata_conn,
                                           command,
                                           run_id,
                                           rows_before=rows_before)

        # Calculate number of rows in the classification table outside retention window
        cursor.execute(
            sql.SQL("""SELECT COUNT(*)
                                    FROM classification_state
                                   WHERE end_date < %s """),
            [first_month_to_drop])
        total_rows_out_window_to_prune = cursor.fetchone()[0]
        logger.info(
            'Found {0:d} rows of classification_state table '
            'with end_date outside the retention window to prune.'.format(
                total_rows_out_window_to_prune))

        # Calculate number of rows in the classification with conditions no longer existing
        cursor.execute(
            sql.SQL("""SELECT COUNT(*)
                                    FROM classification_state
                                   WHERE NOT starts_with_prefix(cond_name, %s)"""
                    ), [cond_config_list])
        total_rows_no_cond_to_prune = cursor.fetchone()[0]
        logger.info(
            'Found {0:d} rows of classification_state table with conditions '
            'no longer existing to prune.'.format(total_rows_no_cond_to_prune))

        logger.debug('Re-creating classification_state table...')
        # Basically, we just re-partition the classification_state table to re-create it, passing a src_filter_sql
        # parameter
        num_phys_imei_shards = partition_utils.num_physical_imei_shards(conn)
        src_filter_sql = cursor.mogrify(
            """WHERE (end_date > %s
                                              OR end_date IS NULL)
                                             AND cond_name LIKE ANY(%s)""",
            [first_month_to_drop, cond_config_list])
        partition_utils.repartition_classification_state(
            conn,
            num_physical_shards=num_phys_imei_shards,
            src_filter_sql=str(src_filter_sql, encoding=conn.encoding))
        logger.debug('Re-created classification_state table')

        logger.debug(
            'Calculating new number of rows in classification_state table...')
        cursor.execute('SELECT COUNT(*) FROM classification_state')
        rows_after = cursor.fetchone()[0]
        logger.debug(
            'Calculated new number of rows in classification_state table')
        statsd.gauge('{0}rows_after'.format(metrics_run_root), rows_after)
        metadata.add_optional_job_metadata(metadata_conn,
                                           command,
                                           run_id,
                                           rows_after=rows_after)

        logger.info('Pruned {0:d} rows from classification_state table'.format(
            rows_after - rows_before))
Exemple #9
0
    def _migrate_device_association_list(self, logger, conn):
        """Method to migrate barred imeis list."""
        with conn.cursor() as cursor:
            cursor.execute(
                sql.SQL("""CREATE TABLE historic_device_association_list (
                                          imei_norm text NOT NULL,
                                          uid text NOT NULL,
                                          start_date TIMESTAMP NOT NULL,
                                          end_date TIMESTAMP,
                                          virt_imei_shard SMALLINT NOT NULL
                                    )
                                    PARTITION BY RANGE (virt_imei_shard);"""))

            num_shards = part_utils.num_physical_imei_shards(conn)
            logger.debug('Granting permissions to barred_list partitions...')
            part_utils._grant_perms_barred_list(
                conn, part_name='historic_device_association_list')
            logger.debug('Creating barred_list child partitions...')
            part_utils.create_imei_shard_partitions(
                conn,
                tbl_name='historic_device_association_list',
                num_physical_shards=num_shards,
                perms_func=part_utils._grant_perms_association_list,
                fillfactor=80)

            # Add indexes to each partition
            idx_metadata = [
                part_utils.IndexMetadatum(idx_cols=['uid', 'imei_norm'],
                                          is_unique=True,
                                          partial_sql='WHERE end_date IS NULL')
            ]
            part_utils.add_indices(conn,
                                   tbl_name='historic_device_association_list',
                                   idx_metadata=idx_metadata)

            # Creating view to historic_barred_list
            cursor.execute("""CREATE OR REPLACE VIEW device_association_list AS
                                   SELECT uid, imei_norm, virt_imei_shard
                                     FROM historic_device_association_list
                                    WHERE end_date IS NULL WITH CHECK OPTION"""
                           )  # noqa: Q440
            cursor.execute("""GRANT SELECT ON device_association_list
                                      TO dirbs_core_classify, dirbs_core_api,
                                      dirbs_core_import_device_association_list"""
                           )

            # Creating insert trigger function
            cursor.execute(
                """CREATE FUNCTION device_association_list_staging_data_insert_trigger_fn() RETURNS TRIGGER
                                  LANGUAGE plpgsql
                                  AS $$
                              BEGIN
                                  NEW.uid = NULLIF(TRIM(NEW.uid), '');
                                  NEW.imei_norm = normalize_imei(NULLIF(TRIM(NEW.imei), ''));
                                  RETURN NEW;
                              END
                              $$;

                              ALTER FUNCTION device_association_list_staging_data_insert_trigger_fn()
                                OWNER TO dirbs_core_power_user;
                            """)
            logger.debug(
                'Granting create permission to dirbs_core_import_device_association_list...'
            )
            cursor.execute(
                'GRANT CREATE ON SCHEMA core TO dirbs_core_import_device_association_list'
            )