def _remember_failure( self, connection, class_, duration, exc_type, exc_value, exc_tb ): exc_traceback = ''.join(traceback.format_tb(exc_tb)) app_name = class_.app_name execute_no_results( connection, """INSERT INTO crontabber_log ( app_name, duration, exc_type, exc_value, exc_traceback ) VALUES ( %s, %s, %s, %s, %s )""", ( app_name, '%.5f' % duration, repr(exc_type), repr(exc_value), exc_traceback ) )
def _remember_failure( self, connection, class_, duration, exc_type, exc_value, exc_tb, ): exc_traceback = ''.join(traceback.format_tb(exc_tb)) app_name = class_.app_name execute_no_results( connection, """INSERT INTO crontabber_log ( app_name, duration, exc_type, exc_value, exc_traceback ) VALUES ( %s, %s, %s, %s, %s )""", (app_name, '%.5f' % duration, repr(exc_type), repr(exc_value), exc_traceback), ) metrics.gauge('job_failure_runtime', value=duration, tags=['job:%s' % app_name])
def _queue_priorty_job_transaction(self, connection, crash_id, candidate_processor_iter): """this method implements a transaction, inserting a crash to both the 'jobs' table (via the '_queue_standard_job_transaction' method) and the 'priority_jobs_XXX' table associated with the target processor""" assigned_processor = self._queue_standard_job_transaction( connection, crash_id, candidate_processor_iter ) if assigned_processor is None: return None execute_no_results( connection, "insert into priority_jobs_%d (uuid) values (%%s)" % assigned_processor, (crash_id,) ) execute_no_results( connection, "delete from priorityjobs where uuid = %s", (crash_id,) ) return assigned_processor
def _remember_success( self, connection, class_, success_date, duration, ): app_name = class_.app_name execute_no_results( connection, """INSERT INTO cron_log ( app_name, success, duration, log_time ) VALUES ( %s, %s, %s, %s )""", (app_name, success_date, '%.5f' % duration, utc_now()), ) metrics.gauge('job_success_runtime', value=duration, tags=['job:%s' % app_name])
def _save_plugins(self, connection, processed_crash, report_id): """ Electrolysis Support - Optional - processed_crash may contain a ProcessType of plugin. In the future this value would be default, content, maybe even Jetpack... This indicates which process was the crashing process. plugin - When set to plugin, the jsonDocument MUST calso contain PluginFilename and PluginName """ process_type = processed_crash['process_type'] if not process_type: return if process_type == "plugin": # Bug#543776 We actually will are relaxing the non-null policy... # a null filename, name, and version is OK. We'll use empty strings try: plugin_filename = processed_crash['PluginFilename'] plugin_name = processed_crash['PluginName'] except KeyError as x: self.config.logger.error( 'the crash is missing a required field: %s', str(x)) return find_plugin_sql = ('select id from plugins ' 'where filename = %s ' 'and name = %s') try: single_value_sql(connection, find_plugin_sql, (plugin_filename, plugin_name)) except SQLDidNotReturnSingleValue: insert_plugsins_sql = ("insert into plugins (filename, name) " "values (%s, %s) returning id") execute_no_results(connection, insert_plugsins_sql, (plugin_filename, plugin_name))
def update_crashstats_signature(self, connection, signature, report_date, report_build): # Pull the data in the db. If it's there, then do an update. If it's # not there, then do an insert. try: sig = single_row_sql( connection, """ SELECT signature, first_build, first_date FROM crashstats_signature WHERE signature=%s """, (signature, )) sql = """ UPDATE crashstats_signature SET first_build=%s, first_date=%s WHERE signature=%s """ params = (min(sig[1], int(report_build)), min(sig[2], string_to_datetime(report_date)), sig[0]) except SQLDidNotReturnSingleRow: sql = """ INSERT INTO crashstats_signature (signature, first_build, first_date) VALUES (%s, %s, %s) """ params = (signature, report_build, report_date) execute_no_results(connection, sql, params)
def _database_transaction( self, connection, raw_adi_logs_pathname, target_date ): with codecs.open(raw_adi_logs_pathname, 'r', 'utf-8') as f: pgcursor = connection.cursor() pgcursor.copy_from( f, 'raw_adi_logs', null='None', columns=[ 'report_date', 'product_name', 'product_os_platform', 'product_os_version', 'product_version', 'build', 'build_channel', 'product_guid', 'count' ] ) pgcursor.execute(_RAW_ADI_QUERY, (target_date,)) # for Bug 1159993 execute_no_results(connection, _FENNEC38_ADI_CHANNEL_CORRECTION_SQL)
def _prioritize_previously_enqueued_jobs_transaction(self, connection, crash_id): """priorty jobs come into the system at random times. A given crash_id may already be queued for processing when a priority request comes in for it. To avoid repeating processing, a priority crash_id is checked to see if it is already queued. If it is, the processor already assigned to it is told to expedite processing. This done just by entering the crash_id into the processors private 'priority_jobs_XXX' table.""" try: job_owner = single_value_sql( connection, "select owner from jobs where uuid = %s", (crash_id,) ) except SQLDidNotReturnSingleValue: return False priority_job_table_name = 'priority_jobs_%d' % job_owner self.config.logger.debug( "priority job %s was already in the queue, assigned to %d", crash_id, job_owner ) try: # detect if the found job was assigned to a processor that was # dead by checking to see if the priority jobs table exists or # not. If id doesn't exist, wait for the job to get reassigned # to a live processor. It in the future, it may be better to # just reassign the job immediately. single_value_sql( # return value intentionally ignored connection, "select 1 from pg_stat_user_tables where relname = %s", (priority_job_table_name,) ) except SQLDidNotReturnSingleValue: self.config.logger.debug( "%s assigned to dead processor %d - " "wait for reassignment", crash_id, job_owner ) # likely that the job is assigned to a dead processor # skip processing it this time around - by next time # hopefully it will have been # re assigned to a live processor return False execute_no_results( connection, "insert into %s (uuid) values (%%s)" % priority_job_table_name, (crash_id,) ) execute_no_results( connection, "delete from priorityjobs where uuid = %s", (crash_id,) ) return True
def _save_plugins(self, connection, processed_crash, report_id): """ Electrolysis Support - Optional - processed_crash may contain a ProcessType of plugin. In the future this value would be default, content, maybe even Jetpack... This indicates which process was the crashing process. plugin - When set to plugin, the jsonDocument MUST calso contain PluginFilename, PluginName, and PluginVersion """ process_type = processed_crash['process_type'] if not process_type: return if process_type == "plugin": # Bug#543776 We actually will are relaxing the non-null policy... # a null filename, name, and version is OK. We'll use empty strings try: plugin_filename = processed_crash['PluginFilename'] plugin_name = processed_crash['PluginName'] plugin_version = processed_crash['PluginVersion'] except KeyError, x: self.config.logger.error( 'the crash is missing a required field: %s', str(x) ) return find_plugin_sql = ('select id from plugins ' 'where filename = %s ' 'and name = %s') try: plugin_id = single_value_sql(connection, find_plugin_sql, (plugin_filename, plugin_name)) except SQLDidNotReturnSingleValue: insert_plugsins_sql = ("insert into plugins (filename, name) " "values (%s, %s) returning id") plugin_id = single_value_sql(connection, insert_plugsins_sql, (plugin_filename, plugin_name)) crash_id = processed_crash['uuid'] table_suffix = self._table_suffix_for_crash_id(crash_id) plugin_reports_table_name = 'plugins_reports_%s' % table_suffix plugins_reports_insert_sql = ( 'insert into %s ' ' (report_id, plugin_id, date_processed, version) ' 'values ' ' (%%s, %%s, %%s, %%s)' % plugin_reports_table_name ) values_tuple = (report_id, plugin_id, processed_crash['date_processed'], plugin_version) execute_no_results(connection, plugins_reports_insert_sql, values_tuple)
def _create_priority_jobs(self, connection): self.processor_id = single_value_sql( connection, "select id from processors where name = %s", (self.processor_name,) ) priority_jobs_table_name = "priority_jobs_%d" % self.processor_id execute_no_results(connection, "drop table if exists %s" % priority_jobs_table_name) execute_no_results( connection, "create table %s (uuid varchar(50) not null primary key)" % priority_jobs_table_name ) self.config.logger.info("created priority jobs table: %s", priority_jobs_table_name) return priority_jobs_table_name
def _clean_jobs_table_transaction(self, connection): """go through the jobs table and remove jobs that are complete""" self.config.logger.debug("removing completed jobs from queue") self.config.logger.debug("starting deletion") execute_no_results( connection, "delete from jobs " "where" " uuid in (select " " uuid" " from" " jobs j" " where" " j.success is not null)")
def _kick_stalled_jobs_transaction(self, connection): """try to restart stalled jobs by changing the startteddatetime to NULL. This should get the attention of the assigned processor""" self.config.logger.debug("restart stalled jobs in queue") execute_no_results( connection, "update jobs " " set starteddatetime = NULL " "where" " success is NULL" " and completeddatetime is NULL" " and starteddatetime < now() - %s - %s", (self.config.registrar.check_in_frequency, self.config.registrar.processor_grace_period))
def test_execute_no_results(self): m_execute = Mock() m_cursor = Mock() m_cursor.execute = m_execute conn = Mock() conn.cursor.return_value = m_cursor dbapi2_util.execute_no_results( conn, "insert into table (a, b, c) values (%s, %s, %s)", (1, 2, 3)) eq_(conn.cursor.call_count, 1) eq_(m_cursor.execute.call_count, 1) m_cursor.execute.assert_called_once_with( "insert into table (a, b, c) values (%s, %s, %s)", (1, 2, 3))
def run(self, connection, date): target_date = (date - datetime.timedelta(days=1)).strftime('%Y-%m-%d') raw_adi_logs_pathname = os.path.join( tempfile.gettempdir(), "%s.raw_adi_logs.TEMPORARY%s" % (target_date, '.txt')) try: with codecs.open(raw_adi_logs_pathname, 'w', 'utf-8') as f: hive = pyhs2.connect( host=self.config.hive_host, port=self.config.hive_port, authMechanism=self.config.hive_auth_mechanism, user=self.config.hive_user, password=self.config.hive_password, database=self.config.hive_database, # the underlying TSocket setTimeout() wants milliseconds timeout=self.config.timeout * 1000) cur = hive.cursor() query = self.config.query % target_date cur.execute(query) for row in cur: if None in row: continue f.write("\t".join( self.remove_control_characters(urllib2.unquote(v)). replace('\\', '\\\\') if isinstance(v, basestring ) else str(v) for v in row)) f.write("\n") with codecs.open(raw_adi_logs_pathname, 'r', 'utf-8') as f: pgcursor = connection.cursor() pgcursor.copy_from(f, 'raw_adi_logs', null='None', columns=[ 'report_date', 'product_name', 'product_os_platform', 'product_os_version', 'product_version', 'build', 'build_channel', 'product_guid', 'count' ]) pgcursor.execute(_RAW_ADI_QUERY, (target_date, )) # for Bug 1159993 execute_no_results(connection, _FENNEC38_ADI_CHANNEL_CORRECTION_SQL) finally: if os.path.isfile(raw_adi_logs_pathname): os.remove(raw_adi_logs_pathname)
def _create_priority_jobs(self, connection): self.processor_id = single_value_sql( connection, "select id from processors where name = %s", (self.processor_name, )) priority_jobs_table_name = "priority_jobs_%d" % self.processor_id execute_no_results( connection, "drop table if exists %s" % priority_jobs_table_name) execute_no_results( connection, "create table %s (uuid varchar(50) not null primary key)" % priority_jobs_table_name) self.config.logger.info('created priority jobs table: %s', priority_jobs_table_name) return priority_jobs_table_name
def _save_processed_transaction(self, connection, processed_crash): report_id = self._save_processed_report(connection, processed_crash) self._save_plugins(connection, processed_crash, report_id) self._save_extensions(connection, processed_crash, report_id) crash_id = processed_crash['uuid'] processed_crashes_table_name = ( 'processed_crashes_%s' % self._table_suffix_for_crash_id(crash_id) ) insert_sql = """insert into %s (uuid, processed_crash, date_processed) values (%%s, %%s, %%s)""" % processed_crashes_table_name savepoint_name = threading.currentThread().getName().replace('-', '') value_list = ( crash_id, json.dumps(processed_crash, cls=JsonDTEncoder), processed_crash["date_processed"] ) execute_no_results(connection, "savepoint %s" % savepoint_name) try: execute_no_results(connection, insert_sql, value_list) execute_no_results( connection, "release savepoint %s" % savepoint_name ) except self.config.database_class.IntegrityError: # report already exists execute_no_results( connection, "rollback to savepoint %s" % savepoint_name )
def _save_processed_report(self, connection, processed_crash): column_list = [] placeholder_list = [] value_list = [] for pro_crash_name, report_name in self._reports_table_mappings: column_list.append(report_name) placeholder_list.append('%s') value_list.append(processed_crash[pro_crash_name]) crash_id = processed_crash['uuid'] reports_table_name = ( 'reports_%s' % self._table_suffix_for_crash_id(crash_id) ) insert_sql = "insert into %s (%s) values (%s) returning id" % ( reports_table_name, ', '.join(column_list), ', '.join(placeholder_list) ) # we want to insert directly into the report table. There is a # chance however that the record already exists. If it does, then # the insert would fail and the connection fall into a "broken" state. # To avoid this, we set a savepoint to which we can roll back if the # record already exists - essentially a nested transaction. # We use the name of the executing thread as the savepoint name. # alternatively we could get a uuid. savepoint_name = threading.currentThread().getName().replace('-', '') execute_no_results(connection, "savepoint %s" % savepoint_name) try: report_id = single_value_sql(connection, insert_sql, value_list) execute_no_results( connection, "release savepoint %s" % savepoint_name ) except self.config.database_class.IntegrityError: # report already exists execute_no_results( connection, "rollback to savepoint %s" % savepoint_name ) execute_no_results( connection, "release savepoint %s" % savepoint_name ) execute_no_results( connection, "delete from %s where uuid = %%s" % reports_table_name, (processed_crash.uuid,) ) report_id = single_value_sql(connection, insert_sql, value_list) return report_id
def _remember_success(self, connection, class_, success_date, duration): app_name = class_.app_name execute_no_results( connection, """INSERT INTO crontabber_log ( app_name, success, duration ) VALUES ( %s, %s, %s )""", (app_name, success_date, '%.5f' % duration) )
def __delitem__(self, connection, key): """remove the item by key or raise KeyError""" try: # result intentionally ignored single_value_sql( connection, """SELECT app_name FROM crontabber WHERE app_name = %s""", (key, )) except SQLDidNotReturnSingleValue: raise KeyError(key) # item exists execute_no_results( connection, """DELETE FROM crontabber WHERE app_name = %s""", (key, ))
def _clean_jobs_table_transaction(self, connection): """go through the jobs table and remove jobs that are complete""" self.config.logger.debug("removing completed jobs from queue") self.config.logger.debug("starting deletion") execute_no_results( connection, "delete from jobs " "where" " uuid in (select " " uuid" " from" " jobs j" " where" " j.success is not null)" )
def _kick_stalled_jobs_transaction(self, connection): """try to restart stalled jobs by changing the startteddatetime to NULL. This should get the attention of the assigned processor""" self.config.logger.debug("restart stalled jobs in queue") execute_no_results( connection, "update jobs " " set starteddatetime = NULL " "where" " success is NULL" " and completeddatetime is NULL" " and starteddatetime < now() - %s - %s", (self.config.registrar.check_in_frequency, self.config.registrar.processor_grace_period) )
def run(self, connection): select_sql = """ SELECT crash_id FROM reprocessing_jobs LIMIT 10000 """ crash_ids = [] for (crash_id,) in execute_query_iter(connection, select_sql): crash_ids.append(crash_id) delete_sql = """ DELETE from reprocessing_jobs WHERE crash_id = %(crash_id)s """ for crash_id in crash_ids: self.queuing_connection_factory.save_raw_crash(DotDict({"legacy_processing": 0}), [], crash_id) execute_no_results(connection, delete_sql, {"crash_id": crash_id}) connection.commit()
def _queue_priorty_job_transaction(self, connection, crash_id, candidate_processor_iter): """this method implements a transaction, inserting a crash to both the 'jobs' table (via the '_queue_standard_job_transaction' method) and the 'priority_jobs_XXX' table associated with the target processor""" assigned_processor = self._queue_standard_job_transaction( connection, crash_id, candidate_processor_iter) if assigned_processor is None: return None execute_no_results( connection, "insert into priority_jobs_%d (uuid) values (%%s)" % assigned_processor, (crash_id, )) execute_no_results(connection, "delete from priorityjobs where uuid = %s", (crash_id, )) return assigned_processor
def _prioritize_previously_enqueued_jobs_transaction( self, connection, crash_id): """priorty jobs come into the system at random times. A given crash_id may already be queued for processing when a priority request comes in for it. To avoid repeating processing, a priority crash_id is checked to see if it is already queued. If it is, the processor already assigned to it is told to expedite processing. This done just by entering the crash_id into the processors private 'priority_jobs_XXX' table.""" try: job_owner = single_value_sql( connection, "select owner from jobs where uuid = %s", (crash_id, )) except SQLDidNotReturnSingleValue: return False priority_job_table_name = 'priority_jobs_%d' % job_owner self.config.logger.debug( "priority job %s was already in the queue, assigned to %d", crash_id, job_owner) try: # detect if the found job was assigned to a processor that was # dead by checking to see if the priority jobs table exists or # not. If id doesn't exist, wait for the job to get reassigned # to a live processor. It in the future, it may be better to # just reassign the job immediately. single_value_sql( # return value intentionally ignored connection, "select 1 from pg_stat_user_tables where relname = %s", (priority_job_table_name, )) except SQLDidNotReturnSingleValue: self.config.logger.debug( "%s assigned to dead processor %d - " "wait for reassignment", crash_id, job_owner) # likely that the job is assigned to a dead processor # skip processing it this time around - by next time # hopefully it will have been # re assigned to a live processor return False execute_no_results( connection, "insert into %s (uuid) values (%%s)" % priority_job_table_name, (crash_id, )) execute_no_results(connection, "delete from priorityjobs where uuid = %s", (crash_id, )) return True
def run(self, connection): select_sql = """ SELECT crash_id FROM reprocessing_jobs LIMIT 10000 """ crash_ids = [] for crash_id, in execute_query_iter(connection, select_sql): crash_ids.append(crash_id) delete_sql = """ DELETE from reprocessing_jobs WHERE crash_id = %(crash_id)s """ for crash_id in crash_ids: self.queuing_connection_factory.save_raw_crash( DotDict({'legacy_processing': 0}), [], crash_id) execute_no_results(connection, delete_sql, {'crash_id': crash_id}) connection.commit()
def _queue_standard_job_transaction(self, connection, crash_id, candidate_processor_iter): """this method implements a single transaction, inserting a crash into the 'jobs' table. Because the jobs table contains a non-NULL foreign key reference to the 'processors' table, the act of insertion is also the act of assigning the crash to a processor.""" #self.config.logger.debug("trying to insert %s", crash_id) processor_id, processor_name = candidate_processor_iter.next() if processor_id is None: return None execute_no_results( connection, "insert into jobs (pathname, uuid, owner, priority," " queuedDateTime) " "values (%s, %s, %s, %s, %s)", ('', crash_id, processor_id, 1, utc_now())) self.config.logger.info("%s assigned to processor %s (%d)", crash_id, processor_name, processor_id) return processor_id
def test_execute_no_results(self): m_execute = Mock() m_cursor = Mock() m_cursor.execute = m_execute conn = Mock() conn.cursor.return_value = m_cursor dbapi2_util.execute_no_results( conn, "insert into table (a, b, c) values (%s, %s, %s)", (1, 2, 3) ) eq_(conn.cursor.call_count, 1) eq_(m_cursor.execute.call_count, 1) m_cursor.execute.assert_called_once_with( "insert into table (a, b, c) values (%s, %s, %s)", (1, 2, 3) )
def _save_processed_crash(self, connection, processed_crash): crash_id = processed_crash['uuid'] processed_crashes_table_name = ( 'processed_crashes_%s' % self._table_suffix_for_crash_id(crash_id)) upsert_sql = """ WITH update_processed_crash AS ( UPDATE %(table)s SET processed_crash = %%(processed_json)s, date_processed = %%(date_processed)s WHERE uuid = %%(uuid)s RETURNING 1 ), insert_processed_crash AS ( INSERT INTO %(table)s (uuid, processed_crash, date_processed) ( SELECT %%(uuid)s as uuid, %%(processed_json)s as processed_crash, %%(date_processed)s as date_processed WHERE NOT EXISTS ( SELECT uuid from %(table)s WHERE uuid = %%(uuid)s LIMIT 1 ) ) RETURNING 2 ) SELECT * from update_processed_crash UNION ALL SELECT * from insert_processed_crash """ % { 'table': processed_crashes_table_name, 'uuid': crash_id } values = { 'processed_json': json.dumps(processed_crash, cls=JsonDTEncoder), 'date_processed': processed_crash["date_processed"], 'uuid': crash_id } execute_no_results(connection, upsert_sql, values)
def _save_extensions(self, connection, processed_crash, report_id): extensions = processed_crash['addons'] crash_id = processed_crash['uuid'] table_suffix = self._table_suffix_for_crash_id(crash_id) extensions_table_name = 'extensions_%s' % table_suffix extensions_insert_sql = ( "insert into %s " " (report_id, date_processed, extension_key, extension_id, " " extension_version)" "values (%%s, %%s, %%s, %%s, %%s)" % extensions_table_name) for i, x in enumerate(extensions): try: execute_no_results( connection, extensions_insert_sql, (report_id, processed_crash['date_processed'], i, x[0][:100], x[1])) except IndexError: self.config.logger.warning( '"%s" is deficient as a name and version for an addon', str(x[0]))
def _save_raw_crash_transaction(self, connection, raw_crash, crash_id): raw_crash_table_name = ('raw_crashes_%s' % self._table_suffix_for_crash_id(crash_id)) upsert_sql = """ WITH update_raw_crash AS ( UPDATE %(table)s SET raw_crash = %%(raw_crash)s, date_processed = %%(date_processed)s WHERE uuid = %%(crash_id)s RETURNING 1 ), insert_raw_crash AS ( INSERT into %(table)s (uuid, raw_crash, date_processed) ( SELECT %%(crash_id)s as uuid, %%(raw_crash)s as raw_crash, %%(date_processed)s as date_processed WHERE NOT EXISTS ( SELECT uuid from %(table)s WHERE uuid = %%(crash_id)s LIMIT 1 ) ) RETURNING 2 ) SELECT * from update_raw_crash UNION ALL SELECT * from insert_raw_crash """ % { 'table': raw_crash_table_name } values = { 'crash_id': crash_id, 'raw_crash': json.dumps(raw_crash), 'date_processed': raw_crash["submitted_timestamp"] } execute_no_results(connection, upsert_sql, values)
def _save_extensions(self, connection, processed_crash, report_id): extensions = processed_crash['addons'] if not extensions: return crash_id = processed_crash['uuid'] table_suffix = self._table_suffix_for_crash_id(crash_id) extensions_table_name = 'extensions_%s' % table_suffix extensions_insert_sql = ( "insert into %s " " (report_id, date_processed, extension_key, extension_id, " " extension_version)" "values (%%s, %%s, %%s, %%s, %%s)" % extensions_table_name ) # why are we deleting first? This might be a reprocessing job and # the extensions data might already be in the table: a straight insert # might fail. Why not check to see if there is data already there # and then just not insert if data is there? We may be reprocessing # to deal with missing extensions data, so just because there is # already data there doesn't mean that we can skip this. # What about using "upsert" sql - that would be fine and result in one # fewer round trip between client and database, but "upsert" sql is # opaque and not easy to understand at a glance. This was faster to # implement. What about using "transaction check points"? # too many round trips between the client and the server. clear_extensions_sql = ( "delete from %s where report_id = %%s" % extensions_table_name ) execute_no_results(connection, clear_extensions_sql, (report_id,)) for i, x in enumerate(extensions): try: execute_no_results(connection, extensions_insert_sql, (report_id, processed_crash['date_processed'], i, x[0][:100], x[1])) except IndexError: self.config.logger.warning( '"%s" is deficient as a name and version for an addon', str(x[0]) )
def __delitem__(self, connection, key): """remove the item by key or raise KeyError""" try: # result intentionally ignored single_value_sql( connection, """SELECT app_name FROM crontabber WHERE app_name = %s""", (key,) ) except SQLDidNotReturnSingleValue: raise KeyError(key) # item exists execute_no_results( connection, """DELETE FROM crontabber WHERE app_name = %s""", (key,) )
def _insert_test_data(self, connection): # clear old data, just in case execute_no_results( connection, "TRUNCATE bug_associations, bugs CASCADE" ) # Insert data execute_no_results( connection, "INSERT INTO bugs VALUES (1),(2),(3),(4)" ) execute_no_results( connection, """ INSERT INTO bug_associations (signature, bug_id) VALUES ( 'sign1', 1 ), ( 'js', 1 ), ( 'mysignature', 2 ), ( 'mysignature', 3 ); """ )
def _save_processed_crash(self, connection, processed_crash): crash_id = processed_crash['uuid'] processed_crashes_table_name = ( 'processed_crashes_%s' % self._table_suffix_for_crash_id(crash_id) ) upsert_sql = """ WITH update_processed_crash AS ( UPDATE %(table)s SET processed_crash = %%(processed_json)s, date_processed = %%(date_processed)s WHERE uuid = %%(uuid)s RETURNING 1 ), insert_processed_crash AS ( INSERT INTO %(table)s (uuid, processed_crash, date_processed) ( SELECT %%(uuid)s as uuid, %%(processed_json)s as processed_crash, %%(date_processed)s as date_processed WHERE NOT EXISTS ( SELECT uuid from %(table)s WHERE uuid = %%(uuid)s LIMIT 1 ) ) RETURNING 2 ) SELECT * from update_processed_crash UNION ALL SELECT * from insert_processed_crash """ % {'table': processed_crashes_table_name, 'uuid': crash_id} values = { 'processed_json': json.dumps(processed_crash, cls=JsonDTEncoder), 'date_processed': processed_crash["date_processed"], 'uuid': crash_id } execute_no_results(connection, upsert_sql, values)
def inner_transaction(self, connection, bug_id, signature_set): self.config.logger.debug("bug %s: %s", bug_id, signature_set) if not signature_set: execute_no_results( connection, "DELETE FROM bug_associations WHERE bug_id = %s", (bug_id, )) return try: signature_rows = execute_query_fetchall( connection, "SELECT signature FROM bug_associations WHERE bug_id = %s", (bug_id, )) signatures_db = [x[0] for x in signature_rows] for signature in signatures_db: if signature not in signature_set: execute_no_results( connection, """ DELETE FROM bug_associations WHERE signature = %s and bug_id = %s""", (signature, bug_id)) self.config.logger.info('association removed: %s - "%s"', bug_id, signature) except SQLDidNotReturnSingleRow: signatures_db = [] for signature in signature_set: if signature not in signatures_db: execute_no_results( connection, """ INSERT INTO bug_associations (signature, bug_id) VALUES (%s, %s)""", (signature, bug_id)) self.config.logger.info('new association: %s - "%s"', bug_id, signature)
def _save_raw_crash_transaction(self, connection, raw_crash, crash_id): raw_crash_table_name = ( 'raw_crashes_%s' % self._table_suffix_for_crash_id(crash_id) ) upsert_sql = """ WITH update_raw_crash AS ( UPDATE %(table)s SET raw_crash = %%(raw_crash)s, date_processed = %%(date_processed)s WHERE uuid = %%(crash_id)s RETURNING 1 ), insert_raw_crash AS ( INSERT into %(table)s (uuid, raw_crash, date_processed) ( SELECT %%(crash_id)s as uuid, %%(raw_crash)s as raw_crash, %%(date_processed)s as date_processed WHERE NOT EXISTS ( SELECT uuid from %(table)s WHERE uuid = %%(crash_id)s LIMIT 1 ) ) RETURNING 2 ) SELECT * from update_raw_crash UNION ALL SELECT * from insert_raw_crash """ % {'table': raw_crash_table_name} values = { 'crash_id': crash_id, 'raw_crash': json.dumps(raw_crash), 'date_processed': raw_crash["submitted_timestamp"] } execute_no_results(connection, upsert_sql, values)
def _save_processed_report(self, connection, processed_crash): column_list = [] placeholder_list = [] value_list = [] for pro_crash_name, report_name in self._reports_table_mappings: column_list.append(report_name) placeholder_list.append('%s') value_list.append(processed_crash[pro_crash_name]) crash_id = processed_crash['uuid'] reports_table_name = ('reports_%s' % self._table_suffix_for_crash_id(crash_id)) insert_sql = "insert into %s (%s) values (%s) returning id" % ( reports_table_name, ', '.join(column_list), ', '.join(placeholder_list)) # we want to insert directly into the report table. There is a # chance however that the record already exists. If it does, then # the insert would fail and the connection fall into a "broken" state. # To avoid this, we set a savepoint to which we can roll back if the # record already exists - essentially a nested transaction. # We use the name of the executing thread as the savepoint name. # alternatively we could get a uuid. savepoint_name = threading.currentThread().getName().replace('-', '') execute_no_results(connection, "savepoint %s" % savepoint_name) try: report_id = single_value_sql(connection, insert_sql, value_list) execute_no_results(connection, "release savepoint %s" % savepoint_name) except self.config.database_class.IntegrityError: # report already exists execute_no_results(connection, "rollback to savepoint %s" % savepoint_name) execute_no_results(connection, "release savepoint %s" % savepoint_name) execute_no_results( connection, "delete from %s where uuid = %%s" % reports_table_name, (processed_crash.uuid, )) report_id = single_value_sql(connection, insert_sql, value_list) return report_id
def _save_extensions(self, connection, processed_crash, report_id): extensions = processed_crash['addons'] crash_id = processed_crash['uuid'] table_suffix = self._table_suffix_for_crash_id(crash_id) extensions_table_name = 'extensions_%s' % table_suffix extensions_insert_sql = ( "insert into %s " " (report_id, date_processed, extension_key, extension_id, " " extension_version)" "values (%%s, %%s, %%s, %%s, %%s)" % extensions_table_name ) for i, x in enumerate(extensions): try: execute_no_results(connection, extensions_insert_sql, (report_id, processed_crash['date_processed'], i, x[0][:100], x[1])) except IndexError: self.config.logger.warning( '"%s" is deficient as a name and version for an addon', str(x[0]) )
def _queue_standard_job_transaction(self, connection, crash_id, candidate_processor_iter): """this method implements a single transaction, inserting a crash into the 'jobs' table. Because the jobs table contains a non-NULL foreign key reference to the 'processors' table, the act of insertion is also the act of assigning the crash to a processor.""" #self.config.logger.debug("trying to insert %s", crash_id) processor_id, processor_name = candidate_processor_iter.next() if processor_id is None: return None execute_no_results( connection, "insert into jobs (pathname, uuid, owner, priority," " queuedDateTime) " "values (%s, %s, %s, %s, %s)", ('', crash_id, processor_id, 1, utc_now()) ) self.config.logger.info( "%s assigned to processor %s (%d)", crash_id, processor_name, processor_id ) return processor_id
def inner_transaction(self, connection, bug_id, signature_set): self.config.logger.debug("bug %s: %s", bug_id, signature_set) if not signature_set: execute_no_results( connection, "DELETE FROM bug_associations WHERE bug_id = %s", (bug_id,) ) return try: signature_rows = execute_query_fetchall( connection, "SELECT signature FROM bug_associations WHERE bug_id = %s", (bug_id,) ) signatures_db = [x[0] for x in signature_rows] for signature in signatures_db: if signature not in signature_set: execute_no_results( connection, """ DELETE FROM bug_associations WHERE signature = %s and bug_id = %s""", (signature, bug_id) ) self.config.logger.info('association removed: %s - "%s"', bug_id, signature) except SQLDidNotReturnSingleRow: signatures_db = [] for signature in signature_set: if signature not in signatures_db: execute_no_results( connection, """ INSERT INTO bug_associations (signature, bug_id) VALUES (%s, %s)""", (signature, bug_id) ) self.config.logger.info('association added: %s - "%s"', bug_id, signature)
def update_bug_data(self, connection, bug_id, signature_set): self.config.logger.debug('bug %s: %s', bug_id, signature_set) # If there's no associated signatures, delete everything for this bug id if not signature_set: execute_no_results( connection, """ DELETE FROM crashstats_bugassociation WHERE bug_id = %s """, (bug_id, )) return try: signature_rows = execute_query_fetchall( connection, """ SELECT signature FROM crashstats_bugassociation WHERE bug_id = %s """, (bug_id, )) signatures_db = [x[0] for x in signature_rows] for signature in signatures_db: if signature not in signature_set: execute_no_results( connection, """ DELETE FROM crashstats_bugassociation WHERE signature = %s and bug_id = %s """, (signature, bug_id)) self.config.logger.info('association removed: %s - "%s"', bug_id, signature) except SQLDidNotReturnSingleRow: signatures_db = [] for signature in signature_set: if signature not in signatures_db: execute_no_results( connection, """ INSERT INTO crashstats_bugassociation (signature, bug_id) VALUES (%s, %s) """, (signature, bug_id)) self.config.logger.info('association added: %s - "%s"', bug_id, signature)
def truncate_transaction(connection): execute_no_results( connection, "TRUNCATE bug_associations, bugs CASCADE" )
def setUp(self): super(IntegrationTestFeaturedVersionsSync, self).setUp() self.__truncate() now = utc_now() build_date = now - datetime.timedelta(days=30) sunset_date = now + datetime.timedelta(days=30) execute_no_results( self.conn, """ INSERT INTO products (product_name, sort, release_name) VALUES ('Firefox', 1, 'firefox'), ('Fennec', 1, 'mobile') """ ) execute_no_results( self.conn, """ INSERT INTO product_versions (product_version_id, product_name, major_version, release_version, version_string, version_sort, build_date, sunset_date, featured_version, build_type) VALUES ( 1, 'Firefox', '15.0', '15.0', '15.0a1', '000000150a1', %(build_date)s, %(sunset_date)s, true, 'release' ) ,( 2, 'Firefox', '24.5', '24.5.0', '24.5.0', '024005000x000', %(build_date)s, %(sunset_date)s, false, 'nightly' ) """, { 'build_date': build_date, 'sunset_date': sunset_date } ) execute_no_results( self.conn, """ INSERT INTO release_channels (release_channel, sort) VALUES ('nightly', 1), ('aurora', 2), ('beta', 3), ('release', 4) """ ) execute_no_results( self.conn, """ INSERT INTO product_release_channels (product_name, release_channel, throttle) VALUES ('Firefox', 'nightly', 1), ('Firefox', 'aurora', 1), ('Firefox', 'beta', 1), ('Firefox', 'release', 1), ('Fennec', 'release', 1), ('Fennec', 'beta', 1) """ )
def _sweep_dead_processors_transaction(self, connection): """this function is a single database transaction: look for dead processors - find all the jobs of dead processors and assign them to live processors then delete the dead processor registrations""" self.config.logger.info("looking for dead processors") try: self.config.logger.info( "threshold %s", self.config.registrar.check_in_frequency ) threshold = single_value_sql( connection, "select now() - %s - %s", (self.config.registrar.processor_grace_period, self.config.registrar.check_in_frequency) ) dead_processors = execute_query_fetchall( connection, "select id from processors where lastSeenDateTime < %s", (threshold,) ) if dead_processors: self.config.logger.info("found dead processor(s):") for a_dead_processor in dead_processors: self.config.logger.info("%d is dead", a_dead_processor[0]) self.config.logger.debug("getting list of live processor(s):") live_processors = execute_query_fetchall( connection, "select id from processors where lastSeenDateTime >= %s", (threshold,) ) if not live_processors: if self.config.registrar.quit_if_no_processors: raise NoProcessorsRegisteredError( "There are no processors registered" ) else: self.config.logger.critical( 'There are no live processors, nothing to do. ' 'Waiting for processors to come on line.' ) return number_of_live_processors = len(live_processors) self.config.logger.debug( "getting range of queued date for jobs associated with " "dead processor(s):" ) dead_processor_ids_str = ", ".join( [str(x[0]) for x in dead_processors] ) earliest_dead_job, latest_dead_job = single_row_sql( connection, "select min(queueddatetime), max(queueddatetime) from jobs " "where owner in (%s)" % dead_processor_ids_str ) # take dead processor jobs and reallocate them to live # processors in equal sized chunks if (earliest_dead_job is not None and latest_dead_job is not None): time_increment = ( (latest_dead_job - earliest_dead_job) / number_of_live_processors ) for x, live_processor_id in enumerate(live_processors): low_queued_time = ( x * time_increment + earliest_dead_job ) high_queued_time = ( (x + 1) * time_increment + earliest_dead_job ) self.config.logger.info( "assigning jobs from %s to %s to processor %s:", low_queued_time, high_queued_time, live_processor_id ) # why is the range >= at both ends? the range must be # inclusive, the risk of moving a job twice is low and # consequences low, too. # 1st step: take any jobs of a dead processor that were # in progress and reset them to unprocessed execute_no_results( connection, "update jobs set" " starteddatetime = NULL " "where" " %%s >= queueddatetime" " and queueddatetime >= %%s" " and owner in (%s)" " and success is NULL" % dead_processor_ids_str, (high_queued_time, low_queued_time) ) # 2nd step: take all jobs of a dead processor and give # them to a new owner execute_no_results( connection, "update jobs set" " set owner = %%s " "where" " %%s >= queueddatetime" " and queueddatetime >= %%s" " and owner in (%s)" % dead_processor_ids_str, (live_processor_id, high_queued_time, low_queued_time) ) # transfer stalled priority jobs to new processors for dead_processor_tuple in dead_processors: self.config.logger.info( "re-assigning priority jobs from processor %d:", dead_processor_tuple[0] ) execute_no_results( connection, "insert into priorityjobs (uuid) select uuid " "from priority_jobs_%d" % dead_processor_tuple ) self.config.logger.info("removing all dead processors") execute_no_results( connection, "delete from processors where lastSeenDateTime < %s", (threshold,) ) # remove dead processors' priority tables for a_dead_processor in dead_processors: execute_no_results( connection, "drop table if exists priority_jobs_%d" % a_dead_processor[0] ) except NoProcessorsRegisteredError: self.quit = True self.config.logger.critical('there are no live processors')
def _save_raw_crash_transaction(self, connection, raw_crash, crash_id): raw_crash_table_name = ('raw_crashes_%s' % self._table_suffix_for_crash_id(crash_id)) insert_sql = """insert into %s (uuid, raw_crash, date_processed) values (%%s, %%s, %%s)""" % raw_crash_table_name savepoint_name = threading.currentThread().getName().replace('-', '') value_list = (crash_id, json.dumps(raw_crash), raw_crash["submitted_timestamp"]) execute_no_results(connection, "savepoint %s" % savepoint_name) try: execute_no_results(connection, insert_sql, value_list) execute_no_results(connection, "release savepoint %s" % savepoint_name) except self.config.database_class.IntegrityError: # report already exists execute_no_results(connection, "rollback to savepoint %s" % savepoint_name) execute_no_results(connection, "release savepoint %s" % savepoint_name) execute_no_results( connection, "delete from %s where uuid = %%s" % raw_crash_table_name, (crash_id, )) execute_no_results(connection, insert_sql, value_list)
def __setitem__(self, connection, key, value): class LastErrorEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, type): return repr(obj) return json.JSONEncoder.default(self, obj) try: single_value_sql( connection, """SELECT ongoing FROM crontabber WHERE app_name = %s FOR UPDATE NOWAIT """, (key, )) # If the above single_value_sql() didn't raise a # SQLDidNotReturnSingleValue exception, it means # there is a row by this app_name. # Therefore, the next SQL is an update. next_sql = """ UPDATE crontabber SET next_run = %(next_run)s, first_run = %(first_run)s, last_run = %(last_run)s, last_success = %(last_success)s, depends_on = %(depends_on)s, error_count = %(error_count)s, last_error = %(last_error)s, ongoing = %(ongoing)s WHERE app_name = %(app_name)s """ except OperationalError as exception: if 'could not obtain lock' in exception.args[0]: raise RowLevelLockError(exception.args[0]) else: raise except SQLDidNotReturnSingleValue: # the key does not exist, do an insert next_sql = """ INSERT INTO crontabber ( app_name, next_run, first_run, last_run, last_success, depends_on, error_count, last_error, ongoing ) VALUES ( %(app_name)s, %(next_run)s, %(first_run)s, %(last_run)s, %(last_success)s, %(depends_on)s, %(error_count)s, %(last_error)s, %(ongoing)s ) """ parameters = { 'app_name': key, 'next_run': value['next_run'], 'first_run': value['first_run'], 'last_run': value['last_run'], 'last_success': value.get('last_success'), 'depends_on': value['depends_on'], 'error_count': value['error_count'], 'last_error': json.dumps(value['last_error'], cls=LastErrorEncoder), 'ongoing': value.get('ongoing'), } try: execute_no_results(connection, next_sql, parameters) except IntegrityError as exception: # See CREATE_CRONTABBER_APP_NAME_UNIQUE_INDEX for why # we know to look for this mentioned in the error message. if 'crontabber_unique_app_name_idx' in exception.args[0]: raise RowLevelLockError(exception.args[0]) raise
def _sweep_dead_processors_transaction(self, connection): """this function is a single database transaction: look for dead processors - find all the jobs of dead processors and assign them to live processors then delete the dead processor registrations""" self.config.logger.info("looking for dead processors") try: self.config.logger.info("threshold %s", self.config.registrar.check_in_frequency) threshold = single_value_sql( connection, "select now() - %s - %s", (self.config.registrar.processor_grace_period, self.config.registrar.check_in_frequency)) dead_processors = execute_query_fetchall( connection, "select id from processors where lastSeenDateTime < %s", (threshold, )) if dead_processors: self.config.logger.info("found dead processor(s):") for a_dead_processor in dead_processors: self.config.logger.info("%d is dead", a_dead_processor[0]) self.config.logger.debug("getting list of live processor(s):") live_processors = execute_query_fetchall( connection, "select id from processors where lastSeenDateTime >= %s", (threshold, )) if not live_processors: if self.config.registrar.quit_if_no_processors: raise NoProcessorsRegisteredError( "There are no processors registered") else: self.config.logger.critical( 'There are no live processors, nothing to do. ' 'Waiting for processors to come on line.') return number_of_live_processors = len(live_processors) self.config.logger.debug( "getting range of queued date for jobs associated with " "dead processor(s):") dead_processor_ids_str = ", ".join( [str(x[0]) for x in dead_processors]) earliest_dead_job, latest_dead_job = single_row_sql( connection, "select min(queueddatetime), max(queueddatetime) from jobs " "where owner in (%s)" % dead_processor_ids_str) # take dead processor jobs and reallocate them to live # processors in equal sized chunks if (earliest_dead_job is not None and latest_dead_job is not None): time_increment = ((latest_dead_job - earliest_dead_job) / number_of_live_processors) for x, live_processor_id in enumerate(live_processors): low_queued_time = (x * time_increment + earliest_dead_job) high_queued_time = ((x + 1) * time_increment + earliest_dead_job) self.config.logger.info( "assigning jobs from %s to %s to processor %s:", low_queued_time, high_queued_time, live_processor_id) # why is the range >= at both ends? the range must be # inclusive, the risk of moving a job twice is low and # consequences low, too. # 1st step: take any jobs of a dead processor that were # in progress and reset them to unprocessed execute_no_results( connection, "update jobs set" " starteddatetime = NULL " "where" " %%s >= queueddatetime" " and queueddatetime >= %%s" " and owner in (%s)" " and success is NULL" % dead_processor_ids_str, (high_queued_time, low_queued_time)) # 2nd step: take all jobs of a dead processor and give # them to a new owner execute_no_results( connection, "update jobs set" " set owner = %%s " "where" " %%s >= queueddatetime" " and queueddatetime >= %%s" " and owner in (%s)" % dead_processor_ids_str, (live_processor_id, high_queued_time, low_queued_time)) # transfer stalled priority jobs to new processors for dead_processor_tuple in dead_processors: self.config.logger.info( "re-assigning priority jobs from processor %d:", dead_processor_tuple[0]) execute_no_results( connection, "insert into priorityjobs (uuid) select uuid " "from priority_jobs_%d" % dead_processor_tuple) self.config.logger.info("removing all dead processors") execute_no_results( connection, "delete from processors where lastSeenDateTime < %s", (threshold, )) # remove dead processors' priority tables for a_dead_processor in dead_processors: execute_no_results( connection, "drop table if exists priority_jobs_%d" % a_dead_processor[0]) except NoProcessorsRegisteredError: self.quit = True self.config.logger.critical('there are no live processors')
def _save_plugins(self, connection, processed_crash, report_id): """ Electrolysis Support - Optional - processed_crash may contain a ProcessType of plugin. In the future this value would be default, content, maybe even Jetpack... This indicates which process was the crashing process. plugin - When set to plugin, the jsonDocument MUST calso contain PluginFilename, PluginName, and PluginVersion """ process_type = processed_crash['process_type'] if not process_type: return if process_type == "plugin": # Bug#543776 We actually will are relaxing the non-null policy... # a null filename, name, and version is OK. We'll use empty strings try: plugin_filename = processed_crash['PluginFilename'] plugin_name = processed_crash['PluginName'] plugin_version = processed_crash['PluginVersion'] except KeyError, x: self.config.logger.error( 'the crash is missing a required field: %s', str(x) ) return find_plugin_sql = ('select id from plugins ' 'where filename = %s ' 'and name = %s') try: plugin_id = single_value_sql(connection, find_plugin_sql, (plugin_filename, plugin_name)) except SQLDidNotReturnSingleValue: insert_plugsins_sql = ("insert into plugins (filename, name) " "values (%s, %s) returning id") plugin_id = single_value_sql(connection, insert_plugsins_sql, (plugin_filename, plugin_name)) crash_id = processed_crash['uuid'] table_suffix = self._table_suffix_for_crash_id(crash_id) plugin_reports_table_name = 'plugins_reports_%s' % table_suffix # why are we deleting first? This might be a reprocessing job and # the plugins_reports data might already be in the table: a # straight insert might fail. Why not check to see if there is # data already there and then just not insert if data is there? # We may be reprocessing to deal with missing plugin_reports data, # so just because there is already data there doesn't mean that we # can skip this. What about using "upsert" sql - that would be fine # and result in one fewer round trip between client and database, # but "upsert" sql is opaque and not easy to understand at a # glance. This was faster to implement. What about using # "transaction check points"? Too many round trips between the # client and the server. plugins_reports_delete_sql = ( 'delete from %s where report_id = %%s' % plugin_reports_table_name ) execute_no_results(connection, plugins_reports_delete_sql, (report_id,)) plugins_reports_insert_sql = ( 'insert into %s ' ' (report_id, plugin_id, date_processed, version) ' 'values ' ' (%%s, %%s, %%s, %%s)' % plugin_reports_table_name ) values_tuple = (report_id, plugin_id, processed_crash['date_processed'], plugin_version) execute_no_results(connection, plugins_reports_insert_sql, values_tuple)
def _save_plugins(self, connection, processed_crash, report_id): """ Electrolysis Support - Optional - processed_crash may contain a ProcessType of plugin. In the future this value would be default, content, maybe even Jetpack... This indicates which process was the crashing process. plugin - When set to plugin, the jsonDocument MUST calso contain PluginFilename, PluginName, and PluginVersion """ process_type = processed_crash['process_type'] if not process_type: return if process_type == "plugin": # Bug#543776 We actually will are relaxing the non-null policy... # a null filename, name, and version is OK. We'll use empty strings try: plugin_filename = processed_crash['PluginFilename'] plugin_name = processed_crash['PluginName'] plugin_version = processed_crash['PluginVersion'] except KeyError, x: self.config.logger.error( 'the crash is missing a required field: %s', str(x)) return find_plugin_sql = ('select id from plugins ' 'where filename = %s ' 'and name = %s') try: plugin_id = single_value_sql(connection, find_plugin_sql, (plugin_filename, plugin_name)) except SQLDidNotReturnSingleValue: insert_plugsins_sql = ("insert into plugins (filename, name) " "values (%s, %s) returning id") plugin_id = single_value_sql(connection, insert_plugsins_sql, (plugin_filename, plugin_name)) crash_id = processed_crash['uuid'] table_suffix = self._table_suffix_for_crash_id(crash_id) plugin_reports_table_name = 'plugins_reports_%s' % table_suffix # why are we deleting first? This might be a reprocessing job and # the plugins_reports data might already be in the table: a # straight insert might fail. Why not check to see if there is # data already there and then just not insert if data is there? # We may be reprocessing to deal with missing plugin_reports data, # so just because there is already data there doesn't mean that we # can skip this. What about using "upsert" sql - that would be fine # and result in one fewer round trip between client and database, # but "upsert" sql is opaque and not easy to understand at a # glance. This was faster to implement. What about using # "transaction check points"? Too many round trips between the # client and the server. plugins_reports_delete_sql = ( 'delete from %s where report_id = %%s' % plugin_reports_table_name) execute_no_results(connection, plugins_reports_delete_sql, (report_id, )) plugins_reports_insert_sql = ( 'insert into %s ' ' (report_id, plugin_id, date_processed, version) ' 'values ' ' (%%s, %%s, %%s, %%s)' % plugin_reports_table_name) values_tuple = (report_id, plugin_id, processed_crash['date_processed'], plugin_version) execute_no_results(connection, plugins_reports_insert_sql, values_tuple)
def inner_transaction(self, connection, bug_id, status, resolution, short_desc, signature_set): self.config.logger.debug("bug %s (%s, %s) %s: %s", bug_id, status, resolution, short_desc, signature_set) if not signature_set: execute_no_results(connection, "DELETE FROM bugs WHERE id = %s", (bug_id, )) return useful = False insert_made = False try: status_db, resolution_db, short_desc_db = single_row_sql( connection, """SELECT status, resolution, short_desc FROM bugs WHERE id = %s""", (bug_id, )) if (status_db != status or resolution_db != resolution or short_desc_db != short_desc): execute_no_results( connection, """ UPDATE bugs SET status = %s, resolution = %s, short_desc = %s WHERE id = %s""", (status, resolution, short_desc, bug_id)) self.config.logger.info("bug status updated: %s - %s, %s", bug_id, status, resolution) useful = True signature_rows = execute_query_fetchall( connection, "SELECT signature FROM bug_associations WHERE bug_id = %s", (bug_id, )) signatures_db = [x[0] for x in signature_rows] for signature in signatures_db: if signature not in signature_set: execute_no_results( connection, """ DELETE FROM bug_associations WHERE signature = %s and bug_id = %s""", (signature, bug_id)) self.config.logger.info('association removed: %s - "%s"', bug_id, signature) useful = True except SQLDidNotReturnSingleRow: execute_no_results( connection, """ INSERT INTO bugs (id, status, resolution, short_desc) VALUES (%s, %s, %s, %s)""", (bug_id, status, resolution, short_desc)) insert_made = True signatures_db = [] for signature in signature_set: if signature not in signatures_db: if self._has_signature_report(signature, connection): execute_no_results( connection, """ INSERT INTO bug_associations (signature, bug_id) VALUES (%s, %s)""", (signature, bug_id)) self.config.logger.info('new association: %s - "%s"', bug_id, signature) useful = True else: self.config.logger.info( 'rejecting association (no reports with this ' 'signature): %s - "%s"', bug_id, signature) if useful: if insert_made: self.config.logger.info('new bug: %s - %s, %s, "%s"', bug_id, status, resolution, short_desc) else: if insert_made: self.config.logger.info( 'rejecting bug (no useful information): ' '%s - %s, %s, "%s"', bug_id, status, resolution, short_desc) else: self.config.logger.info( 'skipping bug (no new information): ' '%s - %s, %s, "%s"', bug_id, status, resolution, short_desc) raise NothingUsefulHappened('nothing useful done')
def setUp(self): super(IntegrationTestFeaturedVersionsAutomatic, self).setUp() self._truncate_product_info() now = utc_now() build_date = now - datetime.timedelta(days=30) sunset_date = now + datetime.timedelta(days=30) execute_no_results( self.conn, """ INSERT INTO products (product_name, sort, release_name) VALUES ('Firefox', 1, 'firefox'), ('Fennec', 1, 'mobile') """) execute_no_results( self.conn, """ INSERT INTO product_versions (product_version_id, product_name, major_version, release_version, version_string, version_sort, build_date, sunset_date, featured_version, build_type) VALUES ( 1, 'Firefox', '15.0', '15.0', '15.0a1', '000000150a1', %(build_date)s, %(sunset_date)s, true, 'release' ), ( 2, 'Firefox', '24.5', '24.5.0', '24.5.0', '024005000x000', %(build_date)s, %(sunset_date)s, true, 'nightly' ), ( 3, 'Firefox', '49.0.1', '49.0.1', '49.0.1', '000000150a1', %(build_date)s, %(sunset_date)s, false, 'release' ), ( 4, 'Firefox', '50.0b', '50.0b', '50.0b', '024005000x000', %(build_date)s, %(sunset_date)s, false, 'beta' ), ( 5, 'Firefox', '51.0a2', '51.0a2', '51.0a2', '000000150a1', %(build_date)s, %(sunset_date)s, false, 'aurora' ), ( 6, 'Firefox', '52.0a1', '52.0a1', '52.0a1', '024005000x000', %(build_date)s, %(sunset_date)s, false, 'nightly' ) """, { 'build_date': build_date, 'sunset_date': sunset_date }) execute_no_results( self.conn, """ INSERT INTO release_channels (release_channel, sort) VALUES ('nightly', 1), ('aurora', 2), ('beta', 3), ('release', 4) """) execute_no_results( self.conn, """ INSERT INTO product_release_channels (product_name, release_channel, throttle) VALUES ('Firefox', 'nightly', 1), ('Firefox', 'aurora', 1), ('Firefox', 'beta', 1), ('Firefox', 'release', 1), ('Fennec', 'release', 1), ('Fennec', 'beta', 1) """)
def _save_raw_crash_transaction(self, connection, raw_crash, crash_id): raw_crash_table_name = ( 'raw_crashes_%s' % self._table_suffix_for_crash_id(crash_id) ) insert_sql = """insert into %s (uuid, raw_crash, date_processed) values (%%s, %%s, %%s)""" % raw_crash_table_name savepoint_name = threading.currentThread().getName().replace('-', '') value_list = ( crash_id, json.dumps(raw_crash), raw_crash["submitted_timestamp"] ) execute_no_results(connection, "savepoint %s" % savepoint_name) try: execute_no_results(connection, insert_sql, value_list) execute_no_results( connection, "release savepoint %s" % savepoint_name ) except self.config.database_class.IntegrityError: # report already exists execute_no_results( connection, "rollback to savepoint %s" % savepoint_name ) execute_no_results( connection, "release savepoint %s" % savepoint_name ) execute_no_results( connection, "delete from %s where uuid = %%s" % raw_crash_table_name, (crash_id,) ) execute_no_results(connection, insert_sql, value_list)
def __setitem__(self, connection, key, value): class LastErrorEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, type): return repr(obj) return json.JSONEncoder.default(self, obj) try: single_value_sql( connection, """SELECT app_name FROM crontabber WHERE app_name = %s""", (key,) ) # the key exists, do an update next_sql = """UPDATE crontabber SET next_run = %(next_run)s, first_run = %(first_run)s, last_run = %(last_run)s, last_success = %(last_success)s, depends_on = %(depends_on)s, error_count = %(error_count)s, last_error = %(last_error)s WHERE app_name = %(app_name)s""" except SQLDidNotReturnSingleValue: # the key does not exist, do an insert next_sql = """ INSERT INTO crontabber ( app_name, next_run, first_run, last_run, last_success, depends_on, error_count, last_error ) VALUES ( %(app_name)s, %(next_run)s, %(first_run)s, %(last_run)s, %(last_success)s, %(depends_on)s, %(error_count)s, %(last_error)s )""" parameters = { 'app_name': key, 'next_run': value['next_run'], 'first_run': value['first_run'], 'last_run': value['last_run'], 'last_success': value.get('last_success'), 'depends_on': value['depends_on'], 'error_count': value['error_count'], 'last_error': json.dumps(value['last_error'], cls=LastErrorEncoder) } execute_no_results( connection, next_sql, parameters )
def inner_transaction( self, connection, bug_id, status, resolution, short_desc, signature_set ): self.config.logger.debug( "bug %s (%s, %s) %s: %s", bug_id, status, resolution, short_desc, signature_set) if not signature_set: execute_no_results( connection, "DELETE FROM bugs WHERE id = %s", (bug_id,) ) return useful = False insert_made = False try: status_db, resolution_db, short_desc_db = single_row_sql( connection, """SELECT status, resolution, short_desc FROM bugs WHERE id = %s""", (bug_id,) ) if (status_db != status or resolution_db != resolution or short_desc_db != short_desc): execute_no_results( connection, """ UPDATE bugs SET status = %s, resolution = %s, short_desc = %s WHERE id = %s""", (status, resolution, short_desc, bug_id) ) self.config.logger.info( "bug status updated: %s - %s, %s", bug_id, status, resolution ) useful = True signature_rows = execute_query_fetchall( connection, "SELECT signature FROM bug_associations WHERE bug_id = %s", (bug_id,) ) signatures_db = [x[0] for x in signature_rows] for signature in signatures_db: if signature not in signature_set: execute_no_results( connection, """ DELETE FROM bug_associations WHERE signature = %s and bug_id = %s""", (signature, bug_id) ) self.config.logger.info( 'association removed: %s - "%s"', bug_id, signature) useful = True except SQLDidNotReturnSingleRow: execute_no_results( connection, """ INSERT INTO bugs (id, status, resolution, short_desc) VALUES (%s, %s, %s, %s)""", (bug_id, status, resolution, short_desc) ) insert_made = True signatures_db = [] for signature in signature_set: if signature not in signatures_db: if self._has_signature_report(signature, connection): execute_no_results( connection, """ INSERT INTO bug_associations (signature, bug_id) VALUES (%s, %s)""", (signature, bug_id) ) self.config.logger.info( 'new association: %s - "%s"', bug_id, signature ) useful = True else: self.config.logger.info( 'rejecting association (no reports with this ' 'signature): %s - "%s"', bug_id, signature ) if useful: if insert_made: self.config.logger.info( 'new bug: %s - %s, %s, "%s"', bug_id, status, resolution, short_desc ) else: if insert_made: self.config.logger.info( 'rejecting bug (no useful information): ' '%s - %s, %s, "%s"', bug_id, status, resolution, short_desc) else: self.config.logger.info( 'skipping bug (no new information): ' '%s - %s, %s, "%s"', bug_id, status, resolution, short_desc ) raise NothingUsefulHappened('nothing useful done')
def run(self, connection, date): target_date = (date - datetime.timedelta(days=1)).strftime('%Y-%m-%d') raw_adi_logs_pathname = os.path.join( tempfile.gettempdir(), "%s.raw_adi_logs.TEMPORARY%s" % ( target_date, '.txt' ) ) try: with codecs.open(raw_adi_logs_pathname, 'w', 'utf-8') as f: hive = pyhs2.connect( host=self.config.hive_host, port=self.config.hive_port, authMechanism=self.config.hive_auth_mechanism, user=self.config.hive_user, password=self.config.hive_password, database=self.config.hive_database, # the underlying TSocket setTimeout() wants milliseconds timeout=self.config.timeout * 1000 ) cur = hive.cursor() query = self.config.query % target_date cur.execute(query) for row in cur: if None in row: continue f.write( "\t" .join( self.remove_control_characters( urllib2.unquote(v) ).replace('\\', '\\\\') if isinstance(v, basestring) else str(v) for v in row ) ) f.write("\n") with codecs.open(raw_adi_logs_pathname, 'r', 'utf-8') as f: pgcursor = connection.cursor() pgcursor.copy_from( f, 'raw_adi_logs', null='None', columns=[ 'report_date', 'product_name', 'product_os_platform', 'product_os_version', 'product_version', 'build', 'build_channel', 'product_guid', 'count' ] ) pgcursor.execute(_RAW_ADI_QUERY, (target_date,)) # for Bug 1159993 execute_no_results(connection, _FENNEC38_ADI_CHANNEL_CORRECTION_SQL) finally: if os.path.isfile(raw_adi_logs_pathname): os.remove(raw_adi_logs_pathname)