def _calc_imeis_job(self, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end): """Function to calculate the IMEIs that are met by this condition (single job).""" with create_db_connection(app_config.db_config) as conn, conn.cursor( ) as cursor, CodeProfiler() as cp: dims_sql = [ d.sql(conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=curr_date) for d in self.dimensions ] # Calculate the SQL for the intersection of all dimensions condition_sql = sql.SQL(' INTERSECT ').join(dims_sql) # If sticky, we need to UNION the sql with the currently selected IMEIs if self.config.sticky: condition_sql = sql.SQL("""SELECT imei_norm FROM classification_state WHERE cond_name = {cond_name} AND virt_imei_shard >= {virt_imei_range_start} AND virt_imei_shard < {virt_imei_range_end} AND end_date IS NULL UNION ALL {cond_results_sql} """).format( cond_name=sql.Literal(self.label), virt_imei_range_start=sql.Literal(virt_imei_range_start), virt_imei_range_end=sql.Literal(virt_imei_range_end), cond_results_sql=condition_sql) # Make sure we only get distinct IMEIs condition_sql = sql.SQL( """SELECT imei_norm, calc_virt_imei_shard(imei_norm) AS virt_imei_shard FROM ({0}) non_distinct GROUP BY imei_norm""").format( condition_sql) # Copy results to the temp table tbl_name = partition_utils.imei_shard_name( base_name=self.intermediate_tbl_name(run_id), virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) cursor.execute( sql.SQL( """INSERT INTO {intermediate_tbl}(imei_norm, virt_imei_shard) {condition_sql}""" ).format(intermediate_tbl=sql.Identifier(tbl_name), condition_sql=condition_sql)) matching_imeis_count = cursor.rowcount return matching_imeis_count, cp.duration
def _time_component_perf(self, key, fn, *fnargs, **fnkwargs): """Helper function to time the performance of a function.""" component_key = '{0}import_time.components.{1}'.format( self._metrics_import_root, key) perf_metadata_path = '{{performance_timing,{0}_start}}'.format(key) metadata.add_time_metadata(self._metadata_conn, 'dirbs-import', self.import_id, perf_metadata_path) with CodeProfiler() as cp: rv = fn(*fnargs, **fnkwargs) self._statsd.gauge(component_key, cp.duration) perf_metadata_path = '{{performance_timing, {0}_end}}'.format(key) metadata.add_time_metadata(self._metadata_conn, 'dirbs-import', self.import_id, perf_metadata_path) return rv
def _update_classification_state_job(self, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end): """Function to update the classificate_state table with IMEIs that are met by this condition (single job).""" with create_db_connection(app_config.db_config) as conn, conn.cursor( ) as cursor, CodeProfiler() as cp: src_shard_name = partition_utils.imei_shard_name( base_name=self.intermediate_tbl_name(run_id), virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) # Add index on imei_norm indices = [ partition_utils.IndexMetadatum(idx_cols=['imei_norm'], is_unique=True) ] partition_utils.add_indices(conn, tbl_name=src_shard_name, idx_metadata=indices) # Analyze table for better stats/plans cursor.execute( sql.SQL('ANALYZE {0}').format(sql.Identifier(src_shard_name))) # Calculate block date if curr_date is None: curr_date = datetime.date.today() in_amnesty_eval_period, in_amnesty_period = compute_amnesty_flags( app_config, curr_date) # If condition is blocking and is not eligible for amnesty, then compute block_date. # The block_date is set to NULL for amnesty_eligible condition within the eval period. amnesty_eligible = self.config.amnesty_eligible sticky_block_date = curr_date + datetime.timedelta(days=self.config.grace_period) \ if self.config.blocking and not (amnesty_eligible and in_amnesty_eval_period) else None # If the condition's amnesty_eligible flag changed while in eval period, then make sure we update # the amnesty_granted column in the classification_state table for existing IMEIs meeting that condition. # These rows will be selected by the existing WHERE clause filters as the block_date would change # from being NULL to not-NULL or vice-versa. set_amnesty_granted_column = sql.SQL(', amnesty_granted = {0}').format(sql.Literal(amnesty_eligible)) \ if in_amnesty_eval_period else sql.SQL('') # If in amnesty period, update the block_date for IMEIs that were previously classified # as amnesty eligible. This filter is to select amnesty_granted IMEIs with not-NULL block date. # This is to make sure if the amnesty_end_date was updated, we update the block_date too. amnesty_block_date_filter = sql.SQL( 'OR cs.amnesty_granted = TRUE' ) if in_amnesty_period else sql.SQL('') dest_shard_name = partition_utils.imei_shard_name( base_name='classification_state', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) # If a condition is blocking, insert new records into state table with not null blocking date or # set a not null blocking date for the existing ones having a null block_date. # Viceversa, if a condition is not blocking, insert new records into state table with Null block_date # or set a Null block_date for the existing ones having a not-null block_date. # Set the amnesty_granted column equal to condition's amnesty_eligible flag when in amnesty eval # period, otherwise always set it to False for new IMEIs meeting the condition. cursor.execute( sql.SQL("""INSERT INTO {dest_shard} AS cs(imei_norm, cond_name, run_id, start_date, end_date, block_date, amnesty_granted, virt_imei_shard) SELECT imei_norm, %s, %s, %s, NULL, %s, %s, calc_virt_imei_shard(imei_norm) FROM {src_shard} ON CONFLICT (imei_norm, cond_name) WHERE end_date IS NULL DO UPDATE SET block_date = CASE WHEN cs.amnesty_granted = TRUE AND NOT {in_eval_period} THEN {amnesty_end_date} ELSE {sticky_block_date} END {set_amnesty_granted_column} WHERE (cs.block_date IS NULL AND excluded.block_date IS NOT NULL) OR (cs.block_date IS NOT NULL AND excluded.block_date IS NULL) {amnesty_block_date_filter}""" ) # noqa Q441 .format(src_shard=sql.Identifier(src_shard_name), dest_shard=sql.Identifier(dest_shard_name), in_eval_period=sql.Literal(in_amnesty_eval_period), set_amnesty_granted_column=set_amnesty_granted_column, amnesty_block_date_filter=amnesty_block_date_filter, amnesty_end_date=sql.Literal( app_config.amnesty_config.amnesty_period_end_date), sticky_block_date=sql.Literal(sticky_block_date)), [ self.label, run_id, curr_date, sticky_block_date, (amnesty_eligible and in_amnesty_eval_period) ]) # Get rid of records that no longer exist in the matched IMEIs list cursor.execute( sql.SQL("""UPDATE {dest_shard} dst SET end_date = %s WHERE cond_name = %s AND end_date IS NULL AND NOT EXISTS (SELECT imei_norm FROM {src_shard} WHERE imei_norm = dst.imei_norm)""" ).format(src_shard=sql.Identifier(src_shard_name), dest_shard=sql.Identifier(dest_shard_name)), [curr_date, self.label]) return cp.duration