Ejemplo n.º 1
0
    def _calc_imeis_job(self, app_config, run_id, curr_date,
                        virt_imei_range_start, virt_imei_range_end):
        """Function to calculate the IMEIs that are met by this condition (single job)."""
        with create_db_connection(app_config.db_config) as conn, conn.cursor(
        ) as cursor, CodeProfiler() as cp:
            dims_sql = [
                d.sql(conn,
                      app_config,
                      virt_imei_range_start,
                      virt_imei_range_end,
                      curr_date=curr_date) for d in self.dimensions
            ]

            # Calculate the SQL for the intersection of all dimensions
            condition_sql = sql.SQL(' INTERSECT ').join(dims_sql)

            # If sticky, we need to UNION the sql with the currently selected IMEIs
            if self.config.sticky:
                condition_sql = sql.SQL("""SELECT imei_norm
                                             FROM classification_state
                                            WHERE cond_name = {cond_name}
                                              AND virt_imei_shard >= {virt_imei_range_start}
                                              AND virt_imei_shard < {virt_imei_range_end}
                                              AND end_date IS NULL
                                                  UNION ALL {cond_results_sql}
                                        """).format(
                    cond_name=sql.Literal(self.label),
                    virt_imei_range_start=sql.Literal(virt_imei_range_start),
                    virt_imei_range_end=sql.Literal(virt_imei_range_end),
                    cond_results_sql=condition_sql)

            # Make sure we only get distinct IMEIs
            condition_sql = sql.SQL(
                """SELECT imei_norm, calc_virt_imei_shard(imei_norm) AS virt_imei_shard
                                         FROM ({0}) non_distinct
                                     GROUP BY imei_norm""").format(
                    condition_sql)

            # Copy results to the temp table
            tbl_name = partition_utils.imei_shard_name(
                base_name=self.intermediate_tbl_name(run_id),
                virt_imei_range_start=virt_imei_range_start,
                virt_imei_range_end=virt_imei_range_end)
            cursor.execute(
                sql.SQL(
                    """INSERT INTO {intermediate_tbl}(imei_norm, virt_imei_shard) {condition_sql}"""
                ).format(intermediate_tbl=sql.Identifier(tbl_name),
                         condition_sql=condition_sql))

            matching_imeis_count = cursor.rowcount

        return matching_imeis_count, cp.duration
Ejemplo n.º 2
0
 def _time_component_perf(self, key, fn, *fnargs, **fnkwargs):
     """Helper function to time the performance of a function."""
     component_key = '{0}import_time.components.{1}'.format(
         self._metrics_import_root, key)
     perf_metadata_path = '{{performance_timing,{0}_start}}'.format(key)
     metadata.add_time_metadata(self._metadata_conn, 'dirbs-import',
                                self.import_id, perf_metadata_path)
     with CodeProfiler() as cp:
         rv = fn(*fnargs, **fnkwargs)
     self._statsd.gauge(component_key, cp.duration)
     perf_metadata_path = '{{performance_timing, {0}_end}}'.format(key)
     metadata.add_time_metadata(self._metadata_conn, 'dirbs-import',
                                self.import_id, perf_metadata_path)
     return rv
Ejemplo n.º 3
0
    def _update_classification_state_job(self, app_config, run_id, curr_date,
                                         virt_imei_range_start,
                                         virt_imei_range_end):
        """Function to update the classificate_state table with IMEIs that are met by this condition (single job)."""
        with create_db_connection(app_config.db_config) as conn, conn.cursor(
        ) as cursor, CodeProfiler() as cp:
            src_shard_name = partition_utils.imei_shard_name(
                base_name=self.intermediate_tbl_name(run_id),
                virt_imei_range_start=virt_imei_range_start,
                virt_imei_range_end=virt_imei_range_end)

            # Add index on imei_norm
            indices = [
                partition_utils.IndexMetadatum(idx_cols=['imei_norm'],
                                               is_unique=True)
            ]
            partition_utils.add_indices(conn,
                                        tbl_name=src_shard_name,
                                        idx_metadata=indices)

            # Analyze table for better stats/plans
            cursor.execute(
                sql.SQL('ANALYZE {0}').format(sql.Identifier(src_shard_name)))

            # Calculate block date
            if curr_date is None:
                curr_date = datetime.date.today()

            in_amnesty_eval_period, in_amnesty_period = compute_amnesty_flags(
                app_config, curr_date)

            # If condition is blocking and is not eligible for amnesty, then compute block_date.
            # The block_date is set to NULL for amnesty_eligible condition within the eval period.
            amnesty_eligible = self.config.amnesty_eligible
            sticky_block_date = curr_date + datetime.timedelta(days=self.config.grace_period) \
                if self.config.blocking and not (amnesty_eligible and in_amnesty_eval_period) else None

            # If the condition's amnesty_eligible flag changed while in eval period, then make sure we update
            # the amnesty_granted column in the classification_state table for existing IMEIs meeting that condition.
            # These rows will be selected by the existing WHERE clause filters as the block_date would change
            # from being NULL to not-NULL or vice-versa.
            set_amnesty_granted_column = sql.SQL(', amnesty_granted = {0}').format(sql.Literal(amnesty_eligible)) \
                if in_amnesty_eval_period else sql.SQL('')

            # If in amnesty period, update the block_date for IMEIs that were previously classified
            # as amnesty eligible. This filter is to select amnesty_granted IMEIs with not-NULL block date.
            # This is to make sure if the amnesty_end_date was updated, we update the block_date too.
            amnesty_block_date_filter = sql.SQL(
                'OR cs.amnesty_granted = TRUE'
            ) if in_amnesty_period else sql.SQL('')

            dest_shard_name = partition_utils.imei_shard_name(
                base_name='classification_state',
                virt_imei_range_start=virt_imei_range_start,
                virt_imei_range_end=virt_imei_range_end)

            # If a condition is blocking, insert new records into state table with not null blocking date or
            # set a not null blocking date for the existing ones having a null block_date.
            # Viceversa, if a condition is not blocking, insert new records into state table with Null block_date
            # or set a Null block_date for the existing ones having a not-null block_date.
            # Set the amnesty_granted column equal to condition's amnesty_eligible flag when in amnesty eval
            # period, otherwise always set it to False for new IMEIs meeting the condition.
            cursor.execute(
                sql.SQL("""INSERT INTO {dest_shard} AS cs(imei_norm,
                                                                     cond_name,
                                                                     run_id,
                                                                     start_date,
                                                                     end_date,
                                                                     block_date,
                                                                     amnesty_granted,
                                                                     virt_imei_shard)
                                           SELECT imei_norm,
                                                  %s,
                                                  %s,
                                                  %s,
                                                  NULL,
                                                  %s,
                                                  %s,
                                                  calc_virt_imei_shard(imei_norm)
                                             FROM {src_shard}
                                                  ON CONFLICT (imei_norm, cond_name)
                                            WHERE end_date IS NULL
                                                  DO UPDATE
                                                        SET block_date = CASE WHEN cs.amnesty_granted = TRUE
                                                                          AND NOT {in_eval_period}
                                                                         THEN {amnesty_end_date}
                                                                         ELSE {sticky_block_date}
                                                                          END
                                                            {set_amnesty_granted_column}
                                                      WHERE (cs.block_date IS NULL
                                                        AND excluded.block_date IS NOT NULL)
                                                         OR (cs.block_date IS NOT NULL
                                                        AND excluded.block_date IS NULL)
                                                            {amnesty_block_date_filter}"""
                        )  # noqa Q441
                .format(src_shard=sql.Identifier(src_shard_name),
                        dest_shard=sql.Identifier(dest_shard_name),
                        in_eval_period=sql.Literal(in_amnesty_eval_period),
                        set_amnesty_granted_column=set_amnesty_granted_column,
                        amnesty_block_date_filter=amnesty_block_date_filter,
                        amnesty_end_date=sql.Literal(
                            app_config.amnesty_config.amnesty_period_end_date),
                        sticky_block_date=sql.Literal(sticky_block_date)),
                [
                    self.label, run_id, curr_date, sticky_block_date,
                    (amnesty_eligible and in_amnesty_eval_period)
                ])

            # Get rid of records that no longer exist in the matched IMEIs list
            cursor.execute(
                sql.SQL("""UPDATE {dest_shard} dst
                                         SET end_date = %s
                                       WHERE cond_name = %s
                                         AND end_date IS NULL
                                         AND NOT EXISTS (SELECT imei_norm
                                                           FROM {src_shard}
                                                          WHERE imei_norm = dst.imei_norm)"""
                        ).format(src_shard=sql.Identifier(src_shard_name),
                                 dest_shard=sql.Identifier(dest_shard_name)),
                [curr_date, self.label])

        return cp.duration