def export_uuids(path, numberofdays): """Export crash report uuids from a PostgreSQL database to a CSV file path - Directory where the csv file will be created. numberofdays - Number of days of crash reports to retrieve, before the most recent crash date. """ database = db.Database(config) connection = database.connection() cur = connection.cursor() # steps # 1. pull all distinct dates sql = """ SELECT DISTINCT to_char(date_processed, 'YYYY-MM-DD') as day FROM reports ORDER BY day DESC """ if numberofdays: sql = "%s LIMIT %s" % (sql, numberofdays) print 'Calculating dates... ' days = db.execute(cur, sql) days_list = [] for day in days: days_list.append(day[0]) store_filename = 'uuids.csv' store_filename = os.path.normpath('%s/%s' % (path, store_filename)) store_file = open(store_filename, 'w') store = csv.writer(store_file, delimiter=',', quotechar='"') print 'Store file created: %s' % store_filename for day in days_list: date_from = dtu.datetimeFromISOdateString(day) date_to = date_from + datetime.timedelta(1) sql = "SELECT uuid FROM reports WHERE date_processed BETWEEN %s AND %s" print 'Getting crash reports for day %s' % date_from.date() crashes_list = db.execute(cur, sql, (date_from, date_to)) for crash in crashes_list: store.writerow(crash) store_file.close() connection.close() return store_filename
def _get_products(self): """ Return a list of product names """ sql_query = """ /* socorro.external.postgresql.products.Products._get_products */ SELECT * FROM products ORDER BY sort """ json_result = {"total": 0, "hits": []} default_versions = self.get_default_version()["hits"] try: connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql_query) except psycopg2.Error: logger.error("Failed to retrieve products list from PostgreSQL", exc_info=True) else: for product in results: row = dict( zip(("product_name", "sort", "rapid_release_version", "release_name"), product)) row["default_version"] = default_versions[row["product_name"]] json_result["hits"].append(row) json_result["total"] = len(json_result["hits"]) return json_result finally: connection.close()
def dailyUrlDump(config, sdb=sdb, gzipped_csv_files=gzipped_csv_files, IdCache=IdCache, write_row=write_row, process_crash=process_crash, logger=logger): dbConnectionPool = sdb.DatabaseConnectionPool(config, logger) try: try: db_conn, db_cursor = dbConnectionPool.connectionCursorPair() with gzipped_csv_files(config) as csv_file_handles_tuple: headers_not_yet_written = True id_cache = IdCache(db_cursor) sql_parameters = setup_query_parameters(config) logger.debug("config.day = %s; now = %s; yesterday = %s", config.day, sql_parameters.now_str, sql_parameters.yesterday_str) sql_query = sql % sql_parameters logger.debug("SQL is: %s", sql_query) for crash_row in sdb.execute(db_cursor, sql_query): if headers_not_yet_written: write_row(csv_file_handles_tuple, [x[0] for x in db_cursor.description]) headers_not_yet_written = False column_value_list = process_crash(crash_row, id_cache) write_row(csv_file_handles_tuple, column_value_list) # end for loop over each crash_row finally: dbConnectionPool.cleanup() except: util.reportExceptionAndContinue(logger)
def get(self, **kwargs): """Return a job in the priority queue. """ filters = [ ("uuid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingOrBadArgumentError( "Mandatory parameter 'uuid' is missing or empty") sql = """ /* socorro.external.postgresql.priorityjobs.Priorityjobs.get */ SELECT uuid FROM priorityjobs WHERE uuid=%(uuid)s """ json_result = {"total": 0, "hits": []} try: # Creating the connection to the DB connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql, params) except psycopg2.Error: logger.error("Failed retrieving priorityjobs data from PostgreSQL", exc_info=True) else: for job in results: row = dict(zip(("uuid", ), job)) json_result["hits"].append(row) json_result["total"] = len(json_result["hits"]) finally: connection.close() return json_result
def fetchCrashHistory (self, parameters): if parameters.listOfOs_names and parameters.listOfOs_names != ['']: localOsList = [x[0:3] for x in parameters.listOfOs_names] osNameListPhrase = (','.join("'%s'" % x for x in localOsList)) parameters.os_phrase = "os_short_name in (%s)" % osNameListPhrase else: parameters.os_phrase = '1=1' if parameters.report_type == 'crash': parameters.report_type_phrase = "report_type = '%s'" % adu_codes.CRASH_BROWSER elif parameters.report_type == 'hang': parameters.report_type_phrase = "report_type IN ('%s', '%s')" % (adu_codes.HANG_BROWSER, adu_codes.HANG_PLUGIN) else: # anything other than 'crash' or 'hang' will return all crashes # hang normalized are avoided so as not to count some hang ids multiple times parameters.report_type_phrase = "report_type IN ('%s', '%s', '%s', '%s')" % ( adu_codes.CRASH_BROWSER, adu_codes.HANG_PLUGIN, adu_codes.CONTENT, adu_codes.OOP_PLUGIN, ) sql = """ SELECT adu_day::DATE, os_short_name, SUM(count) FROM daily_crashes WHERE timestamp with time zone %%(start_date)s <= adu_day AND adu_day <= timestamp with time zone %%(end_date)s AND productdims_id = %%(productdims_id)s AND %(os_phrase)s AND %(report_type_phrase)s GROUP BY adu_day, os_short_name order by 1, 2""" % parameters #logger.debug('%s', self.connection.cursor().mogrify(sql.encode(self.connection.encoding), parameters)) return dict((((bucket, os_name), count) for bucket, os_name, count in db.execute(self.connection.cursor(), sql, parameters)))
def fetchAduHistory(self, parameters): if parameters.listOfOs_names and parameters.listOfOs_names != [""]: osNameListPhrase = (",".join("'%s'" % x for x in parameters.listOfOs_names)).replace("Mac", "Mac OS X") parameters.os_phrase = "and os_name in (%s)" % osNameListPhrase else: parameters.os_phrase = "" sql = ( """ select adu_date as date, substring(os_name, 1, 3) as product_os_platform, sum(adu_count)::BIGINT from product_adu pa join product_info pi using (product_version_id) where %%(start_date)s <= adu_date and adu_date <= %%(end_date)s and pi.product_name = %%(product)s and pi.version_string = %%(version)s %(os_phrase)s group by date, product_os_platform order by 1""" % parameters ) # logger.debug('%s', self.connection.cursor().mogrify(sql.encode(self.connection.encoding), parameters)) return dict( (((date, os_name), count) for date, os_name, count in db.execute(self.connection.cursor(), sql, parameters)) )
def _get_products(self): """ Return a list of product names """ sql_query = "SELECT * FROM products" json_result = {"total": 0, "hits": []} default_versions = self.get_default_version()["hits"] try: connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql_query) except psycopg2.Error: logger.error("Failed to retrieve products list from PostgreSQL", exc_info=True) else: for product in results: row = dict(zip(("product_name", "sort", "rapid_release_version", "release_name"), product)) row["default_version"] = default_versions[row["product_name"]] json_result["hits"].append(row) json_result["total"] = len(json_result["hits"]) return json_result finally: connection.close()
def get(self, **kwargs): """Return a single crash report from it's UUID. """ filters = [ ("uuid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) day = int(params.uuid[-2:]) month = int(params.uuid[-4:-2]) # assuming we won't use this after year 2099 year = int("20%s" % params.uuid[-6:-4]) crash_date = datetime.date(year=year, month=month, day=day) logger.debug("Looking for crash %s during day %s" % (params.uuid, crash_date)) sql = """/* socorro.external.postgresql.crash.Crash.get */ SELECT reports.email, reports.url, reports.addons_checked, ( SELECT reports_duplicates.duplicate_of FROM reports_duplicates WHERE reports_duplicates.uuid = reports.uuid ) as duplicate_of FROM reports WHERE reports.uuid=%(uuid)s AND reports.success IS NOT NULL AND utc_day_is( reports.date_processed, %(crash_date)s) """ sql_params = { "uuid": params.uuid, "crash_date": crash_date } results = [] # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() try: results = db.execute(cur, sql, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = { "total": 0, "hits": [] } for crash in results: row = dict(zip(( "email", "url", "addons_checked", "duplicate_of"), crash)) json_result["hits"].append(row) json_result["total"] = len(json_result["hits"]) self.connection.close() return json_result
def getListOfTopCrashersBySignature(aCursor, databaseParameters, totalNumberOfCrashesForPeriodFunc=totalNumberOfCrashesForPeriod): """ """ databaseParameters["totalNumberOfCrashes"] = totalNumberOfCrashesForPeriodFunc(aCursor, databaseParameters) if databaseParameters["totalNumberOfCrashes"] == None: return [] assertPairs = { 'totalNumberOfCrashes': long, 'startDate': datetime.datetime, 'to_date': datetime.datetime, 'productdims_id': int, 'limit': int } for param in assertPairs.keys(): assert type(databaseParameters[param]) is assertPairs[param], \ "Expected %s for %s, actual type is %s" % (assertPairs[param], param, type(databaseParameters[param])) where = [] if databaseParameters["crash_type"] == 'browser': where.append("tcbs.plugin_count = 0 AND tcbs.hang_count = 0") if databaseParameters["crash_type"] == 'plugin': where.append("tcbs.plugin_count > 0 OR tcbs.hang_count > 0") if databaseParameters['os']: where.append("os.os_name ILIKE '%s%%'" % databaseParameters['os']) if where: where = "where %s" % " AND ".join(where) else: where = "" sql = """ select tcbs.signature, sum(tcbs.count) as count, cast(sum(tcbs.count) as float) / %d as percentOfTotal, sum(case when os.os_name LIKE 'Windows%%' then tcbs.count else 0 end) as win_count, sum(case when os.os_name = 'Mac OS X' then tcbs.count else 0 end) as mac_count, sum(case when os.os_name = 'Linux' then tcbs.count else 0 end) as linux_count, sum(tcbs.hang_count) as hang_count, sum(tcbs.plugin_count) as plugin_count from top_crashes_by_signature tcbs join osdims os on tcbs.osdims_id = os.id and '%s' < tcbs.window_end and tcbs.window_end <= '%s' and tcbs.productdims_id = %d %s group by tcbs.signature order by 2 desc limit %d""" % (databaseParameters["totalNumberOfCrashes"], databaseParameters["startDate"], \ databaseParameters["to_date"], databaseParameters["productdims_id"], where, \ databaseParameters["limit"]) #logger.debug(aCursor.mogrify(sql, databaseParameters)) return db.execute(aCursor, sql)
def get(self, **kwargs): """Return a single crash report from it's UUID. """ filters = [ ("uuid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if params.uuid is None: raise MissingOrBadArgumentException( "Mandatory parameter 'uuid' is missing or empty") crash_date = datetimeutil.uuid_to_date(params.uuid) logger.debug("Looking for crash %s during day %s" % (params.uuid, crash_date)) sql = """/* socorro.external.postgresql.crash.Crash.get */ SELECT reports.email, reports.url, reports.addons_checked, ( SELECT reports_duplicates.duplicate_of FROM reports_duplicates WHERE reports_duplicates.uuid = reports.uuid ) as duplicate_of FROM reports WHERE reports.uuid=%(uuid)s AND reports.success IS NOT NULL AND utc_day_is( reports.date_processed, %(crash_date)s) """ sql_params = { "uuid": params.uuid, "crash_date": crash_date } results = [] # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() try: results = db.execute(cur, sql, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = { "total": 0, "hits": [] } for crash in results: row = dict(zip(( "email", "url", "addons_checked", "duplicate_of"), crash)) json_result["hits"].append(row) json_result["total"] = len(json_result["hits"]) self.connection.close() return json_result
def testExecute(self): aCursor = TestMultiCursor(numCols=1,numRows=3) f = db.execute(aCursor,"") vals = [x for x in f] assert 3 == len(vals) assert 'Row 0, Column 0' == vals[0][0] assert 'Row 2, Column 0' == vals[-1][0] aCursor = TestMultiCursor(numCols=1,numRows=1)
def testExecute(self): aCursor = TestMultiCursor(numCols=1, numRows=3) f = db.execute(aCursor, "") vals = [x for x in f] assert 3 == len(vals) assert 'Row 0, Column 0' == vals[0][0] assert 'Row 2, Column 0' == vals[-1][0] aCursor = TestMultiCursor(numCols=1, numRows=1)
def get(self, **kwargs): """Return a job in the job queue. """ filters = [ ("uuid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingOrBadArgumentError( "Mandatory parameter 'uuid' is missing or empty") fields = [ "id", "pathname", "uuid", "owner", "priority", "queueddatetime", "starteddatetime", "completeddatetime", "success", "message" ] sql = """ /* socorro.external.postgresql.job.Job.get */ SELECT %s FROM jobs WHERE uuid=%%(uuid)s """ % ", ".join(fields) json_result = { "total": 0, "hits": [] } connection = None try: # Creating the connection to the DB connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql, params) except psycopg2.Error: logger.error("Failed retrieving jobs data from PostgreSQL", exc_info=True) else: for job in results: row = dict(zip(fields, job)) # Make sure all dates are turned into strings for i in row: if isinstance(row[i], datetime.datetime): row[i] = datetimeutil.date_to_string(row[i]) json_result["hits"].append(row) json_result["total"] = len(json_result["hits"]) finally: if connection: connection.close() return json_result
def get(self, **kwargs): """Return a list of signature - bug id associations. """ filters = [ ("signatures", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) if not params.signatures: raise MissingOrBadArgumentError( "Mandatory parameter 'signatures' is missing or empty") # Preparing variables for the SQL query signatures = [] sql_params = {} for i, elem in enumerate(params.signatures): signatures.append("%%(signature%s)s" % i) sql_params["signature%s" % i] = elem sql = """/* socorro.external.postgresql.bugs.Bugs.get */ SELECT ba.signature, bugs.id FROM bugs JOIN bug_associations AS ba ON bugs.id = ba.bug_id WHERE EXISTS( SELECT 1 FROM bug_associations WHERE bug_associations.bug_id = bugs.id AND signature IN (%s) ) """ % ", ".join(signatures) sql = str(" ".join(sql.split())) # better formatting of the sql string connection = None try: connection = self.database.connection() cur = connection.cursor() #~ logger.debug(cur.mogrify(sql, sql_params)) results = db.execute(cur, sql, sql_params) except psycopg2.Error: logger.error("Failed retrieving extensions data from PostgreSQL", exc_info=True) else: result = { "total": 0, "hits": [] } for crash in results: row = dict(zip(("signature", "id"), crash)) result["hits"].append(row) result["total"] = len(result["hits"]) return result finally: if connection: connection.close()
def get(self, **kwargs): filters = [ ("start_date", None, "datetime"), ("end_date", None, "datetime"), ("product", None, "str"), ("version", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) results = [] # So we have something to return. query_string = """SELECT product_name, version_string, product_version_id, report_date, nightly_builds.build_date, days_out, sum(report_count) as report_count FROM nightly_builds JOIN product_versions USING ( product_version_id ) WHERE report_date <= %(end_date)s AND report_date >= %(start_date)s AND product_name = %(product)s AND version_string = %(version)s GROUP BY product_name, version_string, product_version_id, report_date, nightly_builds.build_date, days_out""" try: connection = self.database.connection() cursor = connection.cursor() sql_results = db.execute(cursor, query_string, params) except psycopg2.Error: logger.error("Failed retrieving crashtrends data from PostgreSQL", exc_info=True) else: for trend in sql_results: row = dict(zip(( "product_name", "version_string", "product_version_id", "report_date", "build_date", "days_out", "report_count"), trend)) row['report_date'] = datetimeutil.date_to_string(row['report_date']) row['build_date'] = datetimeutil.date_to_string(row['build_date']) results.append(row) finally: connection.close() results = {'crashtrends' : results} return results
def get(self, **kwargs): filters = [ ("start_date", None, "datetime"), ("end_date", None, "datetime"), ("product", None, "str"), ("version", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) results = [] # So we have something to return. query_string = """SELECT product_name, version_string, product_version_id, report_date, nightly_builds.build_date, days_out, sum(report_count) as report_count FROM nightly_builds JOIN product_versions USING ( product_version_id ) WHERE report_date <= %(end_date)s AND report_date >= %(start_date)s AND product_name = %(product)s AND version_string = %(version)s GROUP BY product_name, version_string, product_version_id, report_date, nightly_builds.build_date, days_out""" try: connection = self.database.connection() cursor = connection.cursor() sql_results = db.execute(cursor, query_string, params) except psycopg2.Error: logger.error("Failed retrieving crashtrends data from PostgreSQL", exc_info=True) else: for trend in sql_results: row = dict( zip(("product_name", "version_string", "product_version_id", "report_date", "build_date", "days_out", "report_count"), trend)) row['report_date'] = datetimeutil.date_to_string( row['report_date']) row['build_date'] = datetimeutil.date_to_string( row['build_date']) results.append(row) finally: connection.close() results = {'crashtrends': results} return results
def fetchCrashHistoryDetails (self, parameters): if parameters.listOfOs_names and parameters.listOfOs_names != ['']: localOsList = [x[0:3] for x in parameters.listOfOs_names] osNameListPhrase = (','.join("'%s'" % x for x in localOsList)) parameters.os_phrase = "os_short_name in (%s)" % osNameListPhrase else: parameters.os_phrase = '1=1' if parameters.listOfReport_types and parameters.listOfReport_types != ['']: lookup = {'crash': adu_codes.CRASH_BROWSER, 'oopp': adu_codes.OOP_PLUGIN, 'hang_unique': adu_codes.HANGS_NORMALIZED, 'hang_browser': adu_codes.HANG_BROWSER, 'hang_plugin': adu_codes.HANG_PLUGIN, } reportTypeListPhrase = (','.join("'%s'" % lookup[x] for x in parameters.listOfReport_types)) parameters.report_types_phrase = "report_type in (%s)" % reportTypeListPhrase else: parameters.report_types_phrase = '1=1' columnSql = "SUM(CASE WHEN report_type = '%s' THEN count ELSE 0 END) as %s" parameters.selectListPhrase = (', '.join((columnSql % (lookup[x], x)) for x in parameters.listOfReport_types)) logger.debug("created phrase %s" % parameters.selectListPhrase) sql = """ SELECT adu_day, os_short_name, %(selectListPhrase)s FROM daily_crashes WHERE timestamp without time zone %%(start_date)s < adu_day AND adu_day <= timestamp without time zone %%(end_date)s AND productdims_id = %%(productdims_id)s AND %(os_phrase)s AND %(report_types_phrase)s GROUP BY adu_day, os_short_name order by 1, 2""" % parameters logger.debug('%s', self.connection.cursor().mogrify(sql.encode(self.connection.encoding), parameters)) db_results = db.execute(self.connection.cursor(), sql, parameters) # idea... we could do {'crash': crash, 'hang_plugin': hang_plugin}... etc building this dict up above #return dict((((bucket, os_name), crash) for bucket, os_name, crash, hang_plugin in db_results)) column_names = ('date', 'os') + tuple(parameters.listOfReport_types) # grab bucket value and os_name value, rest go into (('hang_plugin': 3), ...) structure = [dict(zip(column_names, x)) for x in db_results] structures = dict([((x['date'], x['os']), x) for x in structure]) # [[('adu_by_day', date), ('os_short_name', 'Win'),...], # [...]] # ultimately we want to produce [(date, os): {'date':date, 'os': "Lin", 'users': 234, 'crashes': 234] # so we can do an quick lookup by (date, os) logger.info("Wow, structure=") logger.info(structures) return structures
def _get_builds_for_product(self, product): cursor = self.connection.cursor( cursor_factory=psycopg2.extras.RealDictCursor) result = db.execute(cursor, """ SELECT product_name as product, version, build_id, build_type, platform, repository FROM releases_raw WHERE product_name = %(product)s """, {"product": product}) return list(result)
def _get_builds_for_version(self, version): cursor = self.connection.cursor( cursor_factory=psycopg2.extras.RealDictCursor) result = db.execute(cursor, """ SELECT product_name as product, version, build_id, build_type, platform, repository FROM releases_raw WHERE version = %(version)s """, {"version": version}) return list(result)
def get(self, **kwargs): """Return a list of extensions associated with a crash's UUID.""" filters = [ ("uuid", None, "str"), ("date", None, "datetime"), ] params = external_common.parse_arguments(filters, kwargs) sql = """/* socorro.external.postgresql.extensions.Extensions.get */ SELECT extensions.* FROM extensions INNER JOIN reports ON extensions.report_id = reports.id WHERE reports.uuid = %(uuid)s AND reports.date_processed = %(crash_date)s AND extensions.date_processed = %(crash_date)s """ sql_params = { "uuid": params.uuid, "crash_date": params.date } result = { "total": 0, "hits": [] } try: connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql, sql_params) except psycopg2.Error: logger.error("Failed retrieving extensions data from PostgreSQL", exc_info=True) else: for crash in results: row = dict(zip(( "report_id", "date_processed", "extension_key", "extension_id", "extension_version"), crash)) result["hits"].append(row) row["date_processed"] = datetimeutil.date_to_string(row["date_processed"]) result["total"] = len(result["hits"]) finally: connection.close() return result
def post(self, **kwargs): """Return a list of signature - bug id associations. """ params = external_common.parse_arguments(self.filters, kwargs) if not params.signatures: raise MissingOrBadArgumentError( "Mandatory parameter 'signatures' is missing or empty") # Preparing variables for the SQL query signatures = [] sql_params = {} for i, elem in enumerate(params.signatures): signatures.append("%%(signature%s)s" % i) sql_params["signature%s" % i] = elem sql = """/* socorro.external.postgresql.bugs.Bugs.get */ SELECT ba.signature, bugs.id FROM bugs JOIN bug_associations AS ba ON bugs.id = ba.bug_id WHERE EXISTS( SELECT 1 FROM bug_associations WHERE bug_associations.bug_id = bugs.id AND signature IN (%s) ) """ % ", ".join(signatures) sql = str(" ".join(sql.split())) # better formatting of the sql string connection = None try: connection = self.database.connection() cur = connection.cursor() #~ logger.debug(cur.mogrify(sql, sql_params)) results = db.execute(cur, sql, sql_params) except psycopg2.Error: logger.error("Failed retrieving extensions data from PostgreSQL", exc_info=True) else: result = {"total": 0, "hits": []} for crash in results: row = dict(zip(("signature", "id"), crash)) result["hits"].append(row) result["total"] = len(result["hits"]) return result finally: if connection: connection.close()
def fetchTotalsForRange(self, params): sql = """ select CAST(ceil(EXTRACT(EPOCH FROM (window_end - %(startDate)s)) / %(stepSize)s) AS INT) as bucket_number, sum(count) from top_crashes_by_signature tcbs where %(startDate)s < window_end and window_end <= %(endDate)s and productdims_id = %(productdims_id)s group by bucket_number order by bucket_number""" return db.execute(self.connection.cursor(), sql, params)
def get_versions_info(self, cur, product_version_list): """ """ if not product_version_list: return None versions = [] products = [] for x in xrange(0, len(product_version_list), 2): products.append(product_version_list[x]) versions.append(product_version_list[x + 1]) params = {} params = PostgresAPI.dispatch_params(params, "product", products) params = PostgresAPI.dispatch_params(params, "version", versions) where = [] for i in xrange(len(products)): index = str(i) where.append(index.join(("(pi.product_name = %(product", ")s AND pi.version_string = %(version", ")s)"))) sql = """/* socorro.search.postgresql.PostgresAPI.get_product_info */ SELECT pi.version_string, which_table, major_version, pi.product_name FROM product_info pi JOIN product_versions pv ON (pv.product_version_id = pi.product_version_id) WHERE %s """ % " OR ".join(where) try: results = db.execute(cur, sql, params) except Exception: results = [] util.reportExceptionAndContinue(logger) res = {} for line in results: row = dict(zip(("version_string", "which_table", "major_version", "product_name"), line)) res[":".join((row["product_name"], row["version_string"]))] = row return res
def __init__(self, context): super(ProductVersionCache, self).__init__() self.config = context self.cache = {} sql = """ select product_name as product, version_string as version, product_version_id as id from product_info """ self.database = db.Database(self.config) connection = self.database.connection() cursor = connection.cursor() for product, version, id in db.execute(cursor, sql): self.cache[(product, version)] = id connection.close()
def get(self, **kwargs): """Return a list of extensions associated with a crash's UUID.""" filters = [ ("uuid", None, "str"), ("date", None, "datetime"), ] params = external_common.parse_arguments(filters, kwargs) sql = """/* socorro.external.postgresql.extensions.Extensions.get */ SELECT extensions.* FROM extensions INNER JOIN reports ON extensions.report_id = reports.id WHERE reports.uuid = %(uuid)s AND reports.date_processed = %(crash_date)s AND extensions.date_processed = %(crash_date)s """ sql_params = {"uuid": params.uuid, "crash_date": params.date} result = {"total": 0, "hits": []} try: connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql, sql_params) except psycopg2.Error: logger.error("Failed retrieving extensions data from PostgreSQL", exc_info=True) else: for crash in results: row = dict( zip(("report_id", "date_processed", "extension_key", "extension_id", "extension_version"), crash)) result["hits"].append(row) row["date_processed"] = datetimeutil.date_to_string( row["date_processed"]) result["total"] = len(result["hits"]) finally: connection.close() return result
def fetchSigHistory(self, parameters): if parameters['signature'] == '##null##': signatureCriterionPhrase = ' and signature is null' else: signatureCriterionPhrase = ' and signature = %(signature)s' if parameters['signature'] == '##empty##': parameters['signature'] = '' sql = """ WITH hist as ( select report_date, report_count from tcbs join signatures using (signature_id) join product_versions using (product_version_id) where report_date between %%(startDate)s and %%(endDate)s and product_name = %%(product)s and version_string = %%(version)s %s group by report_date, report_count order by 1), scaling_window AS ( select hist.*, sum(report_count) over () as total_crashes from hist ) SELECT report_date, report_count, report_count / total_crashes::float as percent_of_total from scaling_window order by report_date DESC """ % signatureCriterionPhrase #logger.debug('%s', self.connection.cursor().mogrify(sql, parameters)) return db.execute(self.connection.cursor(), sql, parameters)
def get(self, **kwargs): """Return a job in the priority queue. """ filters = [ ("uuid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingOrBadArgumentError( "Mandatory parameter 'uuid' is missing or empty") sql = """ /* socorro.external.postgresql.priorityjobs.Priorityjobs.get */ SELECT uuid FROM priorityjobs WHERE uuid=%(uuid)s """ json_result = { "total": 0, "hits": [] } try: # Creating the connection to the DB connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql, params) except psycopg2.Error: logger.error("Failed retrieving priorityjobs data from PostgreSQL", exc_info=True) else: for job in results: row = dict(zip(("uuid",), job)) json_result["hits"].append(row) json_result["total"] = len(json_result["hits"]) finally: connection.close() return json_result
def fetchSignatureHistory (self, params): if params['signature'] == '##null##': signatureCriterionPhrase = ' and signature is null' else: signatureCriterionPhrase = ' and signature = %(signature)s' if params['signature'] == '##empty##': params['signature'] = '' sql = """ select CAST(ceil(EXTRACT(EPOCH FROM (window_end - %%(startDate)s)) / %%(stepSize)s) AS INT) as bucket_number, sum(count) from top_crashes_by_signature tcbs where %%(startDate)s < window_end and window_end <= %%(endDate)s and productdims_id = %%(productdims_id)s %s group by bucket_number order by 1""" % signatureCriterionPhrase #logger.debug('%s', self.connection.cursor().mogrify(sql, params)) return dict(((bucket, count) for bucket, count in db.execute(self.connection.cursor(), sql, params)))
def getListOfTopCrashersBySignature(connection, dbParams): """ Answers a generator of tcbs rows """ assertPairs = { 'startDate': (datetime.date, datetime.datetime), 'to_date': (datetime.date, datetime.datetime), 'product': basestring, 'version': basestring, 'limit': int } for param in assertPairs: if not isinstance(dbParams[param], assertPairs[param]): raise BadArgumentError(type(dbParams[param])) order_by = 'report_count' # default order field where = [''] # trick for the later join if dbParams['crash_type'] != 'all': where.append("process_type = %s" % (sqlutils.quote_value(dbParams['crash_type']), )) if dbParams['os']: abbreviated_os = dbParams['os'][0:3].lower() if abbreviated_os not in ('win', 'lin', 'mac'): # this check prevents possible SQL injections raise BadArgumentError('Invalid OS to order on') order_by = '%s_count' % abbreviated_os where.append("%s > 0" % order_by) where = ' AND '.join(where) table_to_use = 'tcbs' date_range_field = 'report_date' if dbParams['date_range_type'] == 'build': table_to_use = 'tcbs_build' date_range_field = 'build_date' sql = """ WITH tcbs_r as ( SELECT tcbs.signature_id, signature, pv.product_name, version_string, sum(report_count) as report_count, sum(win_count) as win_count, sum(lin_count) as lin_count, sum(mac_count) as mac_count, sum(hang_count) as hang_count, plugin_count(process_type,report_count) as plugin_count, content_count(process_type,report_count) as content_count, first_report, version_list, sum(startup_count) as startup_count, sum(is_gc_count) as is_gc_count FROM %s tcbs JOIN signatures USING (signature_id) JOIN product_versions AS pv USING (product_version_id) JOIN signature_products_rollup AS spr ON spr.signature_id = tcbs.signature_id AND spr.product_name = pv.product_name WHERE pv.product_name = %%s AND version_string = %%s AND tcbs.%s BETWEEN %%s AND %%s %s GROUP BY tcbs.signature_id, signature, pv.product_name, version_string, first_report, spr.version_list ), tcbs_window AS ( SELECT tcbs_r.*, sum(report_count) over () as total_crashes, dense_rank() over (order by report_count desc) as ranking FROM tcbs_r ) SELECT signature, report_count, win_count, lin_count, mac_count, hang_count, plugin_count, content_count, first_report, version_list, %s / total_crashes::float as percent_of_total, startup_count / %s::float as startup_percent, is_gc_count, total_crashes::int FROM tcbs_window ORDER BY %s DESC LIMIT %s """ % (table_to_use, date_range_field, where, order_by, order_by, order_by, dbParams["limit"]) params = ( dbParams['product'], dbParams['version'], dbParams['startDate'], dbParams['to_date'], ) try: cursor = connection.cursor() return db.execute(cursor, sql, params) except Exception: connection.rollback() raise else: connection.commit()
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Optional arguments: see SearchCommon.get_parameters() """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = search_common.get_parameters(kwargs) if params["signature"] is None: return None params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions(params["versions"], params["products"]) # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in self.context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time, rd.duplicate_of """ sql_from = self.build_reports_sql_from(params) sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_order = """ ORDER BY r.date_processed DESC """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join( ("/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join( ("/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where)) # Debug logger.debug(sql_count_query) logger.debug(cur.mogrify(sql_count_query, sql_params)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = {"total": total, "hits": []} # Transforming the results into what we want for crash in results: row = dict( zip(("date_processed", "uptime", "user_comments", "uuid", "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "install_time", "duplicate_of"), crash)) for i in row: if isinstance(row[i], datetime.datetime): row[i] = str(row[i]) json_result["hits"].append(row) self.connection.close() return json_result
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Optional arguments: see SearchCommon.get_parameters() """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = search_common.get_parameters(kwargs) if params["signature"] is None: return None params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions( params["versions"], params["products"]) # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in self.context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = { } # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time, rd.duplicate_of """ sql_from = self.build_reports_sql_from(params) sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_order = """ ORDER BY r.date_processed DESC """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join(( "/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join(( "/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where)) # Debug logger.debug(sql_count_query) logger.debug(cur.mogrify(sql_count_query, sql_params)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = { "total": total, "hits": [] } # Transforming the results into what we want for crash in results: row = dict(zip(( "date_processed", "uptime", "user_comments", "uuid", "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "install_time", "duplicate_of"), crash)) for i in row: if isinstance(row[i], datetime.datetime): row[i] = str(row[i]) json_result["hits"].append(row) self.connection.close() return json_result
def get_comments(self, **kwargs): """Return a list of comments on crash reports, filtered by signatures and other fields. See socorro.lib.search_common.get_parameters() for all filters. """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = self.prepare_search_params(**kwargs) # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query # WARNING: sensitive data is returned here (email). When there is # an authentication mecanism, a verification should be done here. sql_select = """ SELECT r.date_processed, r.user_comments, r.uuid, CASE WHEN r.email = '' THEN null WHEN r.email IS NULL THEN null ELSE r.email END """ sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_where = "%s AND r.user_comments IS NOT NULL" % sql_where sql_order = "ORDER BY email ASC, r.date_processed ASC" # Assembling the query sql_query = " ".join( ("/* external.postgresql.crashes.Crashes.get_comments */", sql_select, sql_from, sql_where, sql_order)) # Query for counting the results sql_count_query = " ".join( ("/* external.postgresql.crashes.Crashes.get_comments */", "SELECT count(*)", sql_from, sql_where)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) result = {"total": total, "hits": []} # Transforming the results into what we want for crash in results: row = dict( zip(("date_processed", "user_comments", "uuid", "email"), crash)) for i in row: if isinstance(row[i], datetime.datetime): row[i] = str(row[i]) result["hits"].append(row) self.connection.close() return result
def getListOfTopCrashersBySignature(aCursor, dbParams): """ Answers a generator of tcbs rows """ assertPairs = { 'startDate': (datetime.date, datetime.datetime), 'to_date': (datetime.date, datetime.datetime), 'product': basestring, 'version': basestring, 'limit': int } for param in assertPairs: if not isinstance(dbParams[param], assertPairs[param]): raise ValueError(type(dbParams[param])) order_by = 'report_count' # default order field where = [''] # trick for the later join if dbParams['crash_type'] != 'all': where.append("process_type = '%s'" % (dbParams['crash_type'],)) if dbParams['os']: order_by = '%s_count' % dbParams['os'][0:3].lower() where.append("%s > 0" % order_by) where = ' AND '.join(where) sql = """ WITH tcbs_r as ( SELECT tcbs.signature_id, signature, pv.product_name, version_string, sum(report_count) as report_count, sum(win_count) as win_count, sum(lin_count) as lin_count, sum(mac_count) as mac_count, sum(hang_count) as hang_count, plugin_count(process_type,report_count) as plugin_count, content_count(process_type,report_count) as content_count, first_report, version_list, sum(startup_count) as startup_count FROM tcbs JOIN signatures USING (signature_id) JOIN product_versions AS pv USING (product_version_id) JOIN signature_products_rollup AS spr ON spr.signature_id = tcbs.signature_id AND spr.product_name = pv.product_name WHERE pv.product_name = '%s' AND version_string = '%s' AND report_date BETWEEN '%s' AND '%s' %s GROUP BY tcbs.signature_id, signature, pv.product_name, version_string, first_report, spr.version_list ), tcbs_window AS ( SELECT tcbs_r.*, sum(report_count) over () as total_crashes, dense_rank() over (order by report_count desc) as ranking FROM tcbs_r ) SELECT signature, report_count, win_count, lin_count, mac_count, hang_count, plugin_count, content_count, first_report, version_list, %s / total_crashes::float as percent_of_total, startup_count / %s::float as startup_percent FROM tcbs_window ORDER BY %s DESC LIMIT %s """ % (dbParams["product"], dbParams["version"], dbParams["startDate"], dbParams["to_date"], where, order_by, order_by, order_by, dbParams["limit"]) #logger.debug(aCursor.mogrify(sql, dbParams)) return db.execute(aCursor, sql)
def versions_info(self, **kwargs): """ Return information about versions of a product. See http://socorro.readthedocs.org/en/latest/middleware.html Keyword arguments: versions - List of products and versions. Return: None if versions is null or empty ; Otherwise a dictionary of data about a version, i.e.: { "product_name:version_string": { "product_version_id": integer, "version_string": "string", "product_name": "string", "major_version": "string" or None, "release_channel": "string" or None, "build_id": [list, of, decimals] or None } } """ # Parse arguments filters = [("versions", None, ["list", "str"])] params = external_common.parse_arguments(filters, kwargs) if "versions" not in params or not params["versions"]: return None products_list = [] (versions_list, products_list) = Util.parse_versions(params["versions"], products_list) if not versions_list: return None versions = [] products = [] for x in xrange(0, len(versions_list), 2): products.append(versions_list[x]) versions.append(versions_list[x + 1]) params = {} params = Util.dispatch_params(params, "product", products) params = Util.dispatch_params(params, "version", versions) where = [] for i in range(len(products)): where.append( str(i).join(("(pi.product_name = %(product", ")s AND pi.version_string = %(version", ")s)"))) sql = """/* socorro.external.postgresql.util.Util.versions_info */ SELECT pv.product_version_id, pi.version_string, pi.product_name, which_table, pv.release_version, pv.build_type, pvb.build_id FROM product_info pi LEFT JOIN product_versions pv ON (pv.product_version_id = pi.product_version_id) JOIN product_version_builds pvb ON (pv.product_version_id = pvb.product_version_id) WHERE %s ORDER BY pv.version_sort """ % " OR ".join(where) # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() try: results = db.execute(cur, sql, params) except Exception: results = [] util.reportExceptionAndContinue(logger) res = {} for line in results: row = dict( zip(("product_version_id", "version_string", "product_name", "which_table", "major_version", "release_channel", "build_id"), line)) key = ":".join((row["product_name"], row["version_string"])) if key in res: # That key already exists, just add it the new buildid res[key]["build_id"].append(int(row["build_id"])) else: if row["which_table"] == "old": row["release_channel"] = row["build_id"] = None del row["which_table"] if row["build_id"]: row["build_id"] = [int(row["build_id"])] res[key] = row return res
def get(self, **kwargs): """ Return urls for signature """ filters = [ ("signature", None, "str"), ("start_date", None, "datetime"), ("end_date", None, "datetime"), ("products", None, ["list", "str"]), ("versions", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) #Because no parameters are optional, we need to loop through #all parameters to ensure each has been set and is not None missingParams = [] for param in params: if not params[param]: missingParams.append(param) if len(missingParams) > 0: raise MissingOrBadArgumentError( "Mandatory parameter(s) '%s' is missing or empty" % ", ".join(missingParams)) all_products_versions_sql = """ /* socorro.external.postgresql.signature_urls.SignatureURLs.get */ SELECT url, count(*) as crash_count FROM reports_clean JOIN reports_user_info USING ( UUID ) JOIN signatures USING ( signature_id ) WHERE reports_clean.date_processed BETWEEN %(start_date)s AND %(end_date)s AND reports_user_info.date_processed BETWEEN %(start_date)s AND %(end_date)s AND signature = %(signature)s AND url <> '' """ sql = """ /* socorro.external.postgresql.signature_urls.SignatureURLs.get */ SELECT url, count(*) as crash_count FROM reports_clean JOIN reports_user_info USING ( UUID ) JOIN signatures USING ( signature_id ) JOIN product_versions USING ( product_version_id ) WHERE reports_clean.date_processed BETWEEN %(start_date)s AND %(end_date)s AND reports_user_info.date_processed BETWEEN %(start_date)s AND %(end_date)s AND signature = %(signature)s AND url <> '' AND ( """ sql_group_order = """ GROUP BY url ORDER BY crash_count DESC LIMIT 100""" sql_params = { "start_date": params.start_date, "end_date": params.end_date, "signature": params.signature } # if this query is for all products the 'ALL' keyword will be # the only item in the products list and this will then also # be for all versions. if 'ALL' in params['products']: sql_query = " ".join((all_products_versions_sql, sql_group_order)) # if this query is for all versions the 'ALL' keyword will be # the only item in the versions list. elif 'ALL' in params['versions']: sql_products = " product_name IN ('%s') )" % ("', '".join( [product for product in params.products])) sql_date_range_limit = """AND '%s' BETWEEN product_versions.build_date AND product_versions.sunset_date""" % params.end_date sql_query = " ".join( (sql, sql_products, sql_date_range_limit, sql_group_order)) else: products = [] (params["products_versions"], products) = self.parse_versions(params["versions"], []) versions_list = [] products_list = [] for x in range(0, len(params["products_versions"]), 2): products_list.append(params["products_versions"][x]) versions_list.append(params["products_versions"][x + 1]) product_version_list = [] for prod in params["products"]: versions = [] [ versions.append(versions_list[i]) for i, x in enumerate(products_list) if x == prod ] product_version_list.append(tuple(versions)) sql_product_version_ids = [ """( product_name = %%(product%s)s AND version_string IN %%(version%s)s ) """ % (x, x) for x in range(len(product_version_list)) ] sql_params = add_param_to_dict(sql_params, "version", product_version_list) sql_params = add_param_to_dict(sql_params, "product", params.products) sql_query = " ".join((sql, " OR ".join(sql_product_version_ids), " ) " + sql_group_order)) json_result = {"total": 0, "hits": []} connection = None try: connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: logger.error( "Failed retrieving urls for signature data from PostgreSQL", exc_info=True) else: for url in results: row = dict(zip(("url", "crash_count"), url)) json_result["hits"].append(row) json_result["total"] = len(json_result["hits"]) return json_result finally: if connection: connection.close()
def get_comments(self, **kwargs): """Return a list of comments on crash reports, filtered by signatures and other fields. See socorro.lib.search_common.get_parameters() for all filters. """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = self.prepare_search_params(**kwargs) # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query # WARNING: sensitive data is returned here (email). When there is # an authentication mecanism, a verification should be done here. sql_select = """ SELECT r.date_processed, r.user_comments, r.uuid, CASE WHEN r.email = '' THEN null WHEN r.email IS NULL THEN null ELSE r.email END """ sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_where = "%s AND r.user_comments IS NOT NULL" % sql_where sql_order = "ORDER BY email ASC, r.date_processed ASC" # Assembling the query sql_query = " ".join(( "/* external.postgresql.crashes.Crashes.get_comments */", sql_select, sql_from, sql_where, sql_order)) # Query for counting the results sql_count_query = " ".join(( "/* external.postgresql.crashes.Crashes.get_comments */", "SELECT count(*)", sql_from, sql_where)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) result = { "total": total, "hits": [] } # Transforming the results into what we want for crash in results: row = dict(zip(( "date_processed", "user_comments", "uuid", "email"), crash)) for i in row: if isinstance(row[i], datetime.datetime): row[i] = str(row[i]) result["hits"].append(row) self.connection.close() return result
def get(self, **kwargs): filters = [ ("report_type", None, "str"), ("signature", None, "str"), ("start_date", None, "datetime"), ("end_date", None, "datetime"), ("versions", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) products = [] versions = [] # Get information about the versions util_service = Util(config=self.context) versions_info = util_service.versions_info(**params) if versions_info: for i, elem in enumerate(versions_info): products.append(versions_info[elem]["product_name"]) versions.append(str(versions_info[elem]["product_version_id"])) params['versions'] = versions params['product'] = products if params['versions'] and params['report_type'] is not 'products': glue = ',' version_search = ' AND reports_clean.product_version_id IN (%s)' version_search = version_search % glue.join(params['versions']) else: version_search = '' if params['product'] and params['report_type'] is not 'products': glue = ',' product_list = ' AND product_name IN %s' else: product_list = '' query_params = report_type_sql.get(params['report_type'], {}) if (params['report_type'] != 'products' and 'first_col' not in query_params): raise Exception('Invalid report type') self.connection = self.database.connection() cursor = self.connection.cursor() if params['report_type'] == 'products': result_cols = [ 'product_name', 'version_string', 'report_count', 'percentage' ] query_string = """WITH counts AS ( SELECT product_version_id, product_name, version_string, count(*) AS report_count FROM reports_clean JOIN product_versions USING (product_version_id) WHERE signature_id = (SELECT signature_id FROM signatures WHERE signature = %s) AND date_processed >= %s AND date_processed < %s GROUP BY product_version_id, product_name, version_string ), totals as ( SELECT product_version_id, product_name, version_string, report_count, sum(report_count) OVER () as total_count FROM counts ) SELECT product_name, version_string, report_count::INT, round((report_count * 100::numeric)/total_count,3)::TEXT as percentage FROM totals ORDER BY report_count DESC""" query_parameters = (params['signature'], params['start_date'], params['end_date']) else: result_cols = ['category', 'report_count', 'percentage'] query_string = ["""WITH counts AS ( SELECT """] query_string.append(query_params['first_col']) query_string.append(""" as category, count(*) AS report_count FROM reports_clean JOIN product_versions USING (product_version_id) """) query_string.append(query_params.get('extra_join', '')) query_string.append(""" WHERE signature_id = (SELECT signature_id FROM signatures WHERE signature = %s) AND date_processed >= %s AND date_processed < %s """) query_string.append(product_list) query_string.append(version_search) query_string.append(""" GROUP BY """) query_string.append(query_params['first_col']) query_string.append("""), totals as ( SELECT category, report_count, sum(report_count) OVER () as total_count FROM counts ) SELECT """) query_string.append(query_params['first_col_format']) query_string.append(""", report_count::INT, round((report_count::numeric)/total_count,5)::TEXT as percentage FROM totals ORDER BY report_count DESC""") query_string = " ".join(query_string) query_parameters = [ params['signature'], params['start_date'], params['end_date'], ] if (product_list): # This MUST be a tuple otherwise it gets cast to an array. query_parameters.append(tuple(params['product'])) query_parameters = tuple(query_parameters) sql_results = db.execute(cursor, query_string, query_parameters) results = [] for row in sql_results: newrow = dict(zip(result_cols, row)) results.append(newrow) return results
def get_frequency(self, **kwargs): """Return the number and frequency of crashes on each OS. See socorro.lib.search_common.get_parameters() for all filters. """ params = self.prepare_search_params(**kwargs) # Creating the parameters for the sql query sql_params = { "signature": params.signature } # Preparing the different parts of the sql query sql_select = [""" SELECT r.build AS build_date, COUNT(CASE WHEN (r.signature = %(signature)s) THEN 1 END) AS count, CAST(COUNT(CASE WHEN (r.signature = %(signature)s) THEN 1 END) AS FLOAT(10)) / count(r.id) AS frequency, COUNT(r.id) AS total """] ## Adding count for each OS for i in self.context.platforms: sql_select.append(""" COUNT(CASE WHEN (r.signature = %%(signature)s AND r.os_name = '%s') THEN 1 END) AS count_%s """ % (i["name"], i["id"])) sql_select.append(""" CASE WHEN (COUNT(CASE WHEN (r.os_name = '%s') THEN 1 END) > 0) THEN (CAST(COUNT(CASE WHEN (r.signature = '%s' AND r.os_name = '%s') THEN 1 END) AS FLOAT(10)) / COUNT(CASE WHEN (r.os_name = '%s') THEN 1 END)) ELSE 0.0 END AS frequency_%s """ % (i["name"], params.signature, i["name"], i["name"], i["id"])) sql_select = ", ".join(sql_select) sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_group = "GROUP BY r.build" sql_order = "ORDER BY r.build DESC" # Assembling the query sql = " ".join(( "/* external.postgresql.crashes.Crashes.get_fequency */", sql_select, sql_from, sql_where, sql_group, sql_order)) sql = str(" ".join(sql.split())) # better formatting of the sql string result = { "total": 0, "hits": [] } connection = None try: connection = self.database.connection() cur = connection.cursor() logger.debug(cur.mogrify(sql, sql_params)) results = db.execute(cur, sql, sql_params) except psycopg2.Error: logger.error("Failed retrieving extensions data from PostgreSQL", exc_info=True) else: fields = ["build_date", "count", "frequency", "total"] for i in self.context.platforms: fields.append("count_%s" % i["id"]) fields.append("frequency_%s" % i["id"]) for crash in results: row = dict(zip(fields, crash)) result["hits"].append(row) result["total"] = len(result["hits"]) finally: if connection: connection.close() return result
def get_paireduuid(self, **kwargs): """Return paired uuid given a uuid and an optional hangid. If a hangid is passed, then return only one result. Otherwise, return all found paired uuids. """ filters = [ ("uuid", None, "str"), ("hangid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingOrBadArgumentException( "Mandatory parameter 'uuid' is missing or empty") crash_date = datetimeutil.uuid_to_date(params.uuid) sql = """ /* socorro.external.postgresql.crashes.Crashes.get_paireduuid */ SELECT uuid FROM reports r WHERE r.uuid != %(uuid)s AND r.date_processed BETWEEN TIMESTAMP %(crash_date)s - CAST('1 day' AS INTERVAL) AND TIMESTAMP %(crash_date)s + CAST('1 day' AS INTERVAL) """ sql_params = { "uuid": params.uuid, "crash_date": crash_date } if params.hangid is not None: sql = """%s AND r.hangid = %%(hangid)s LIMIT 1 """ % sql sql_params["hangid"] = params.hangid else: sql = """%s AND r.hangid IN ( SELECT hangid FROM reports r2 WHERE r2.date_processed BETWEEN TIMESTAMP %%(crash_date)s - CAST('1 day' AS INTERVAL) AND TIMESTAMP %%(crash_date)s + CAST('1 day' AS INTERVAL) AND r2.uuid = %%(uuid)s ) """ % sql result = { "total": 0, "hits": [] } try: connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql, sql_params) # Transforming the results into what we want for report in results: row = dict(zip(("uuid",), report)) result["hits"].append(row) result["total"] = len(result["hits"]) except psycopg2.Error: logger.error("Failed to retrieve paired uuids from database", exc_info=True) finally: connection.close() return result
def getListOfTopCrashersBySignature(connection, dbParams): """ Answers a generator of tcbs rows """ assertPairs = { 'startDate': (datetime.date, datetime.datetime), 'to_date': (datetime.date, datetime.datetime), 'product': basestring, 'version': basestring, 'limit': int } for param in assertPairs: if not isinstance(dbParams[param], assertPairs[param]): raise ValueError(type(dbParams[param])) order_by = 'report_count' # default order field where = [''] # trick for the later join if dbParams['crash_type'] != 'all': where.append("process_type = '%s'" % (dbParams['crash_type'],)) if dbParams['os']: order_by = '%s_count' % dbParams['os'][0:3].lower() where.append("%s > 0" % order_by) where = ' AND '.join(where) table_to_use = 'tcbs' date_range_field = 'report_date' if dbParams['date_range_type'] == 'build': table_to_use = 'tcbs_build' date_range_field = 'build_date' sql = """ WITH tcbs_r as ( SELECT tcbs.signature_id, signature, pv.product_name, version_string, sum(report_count) as report_count, sum(win_count) as win_count, sum(lin_count) as lin_count, sum(mac_count) as mac_count, sum(hang_count) as hang_count, plugin_count(process_type,report_count) as plugin_count, content_count(process_type,report_count) as content_count, first_report, version_list, sum(startup_count) as startup_count, sum(is_gc_count) as is_gc_count FROM %s tcbs JOIN signatures USING (signature_id) JOIN product_versions AS pv USING (product_version_id) JOIN signature_products_rollup AS spr ON spr.signature_id = tcbs.signature_id AND spr.product_name = pv.product_name WHERE pv.product_name = %%s AND version_string = %%s AND tcbs.%s BETWEEN %%s AND %%s %s GROUP BY tcbs.signature_id, signature, pv.product_name, version_string, first_report, spr.version_list ), tcbs_window AS ( SELECT tcbs_r.*, sum(report_count) over () as total_crashes, dense_rank() over (order by report_count desc) as ranking FROM tcbs_r ) SELECT signature, report_count, win_count, lin_count, mac_count, hang_count, plugin_count, content_count, first_report, version_list, %s / total_crashes::float as percent_of_total, startup_count / %s::float as startup_percent, is_gc_count, total_crashes::int FROM tcbs_window ORDER BY %s DESC LIMIT %s """ % ( table_to_use, date_range_field, where, order_by, order_by, order_by, dbParams["limit"] ) cursor = connection.cursor() params = ( dbParams['product'], dbParams['version'], dbParams['startDate'], dbParams['to_date'], ) try: return db.execute(cursor, sql, params) except Exception: connection.rollback() raise else: connection.commit()
def builds(self, **kwargs): """ Return information about nightly builds of one or several products. See http://socorro.readthedocs.org/en/latest/middleware.html#builds Keyword arguments: product - Concerned product version - Concerned version from_date - Retrieve builds from this date to now Return: [ { "product": "string", "version": "string", "platform": "string", "buildid": "integer", "build_type": "string", "beta_number": "string", "repository": "string", "date": "string" }, ... ] """ # Default value for from_date lastweek = utc_now() - timedelta(7) # Parse arguments filters = [ ("product", None, "str"), ("version", None, "str"), ("from_date", lastweek, "datetime"), ] params = external_common.parse_arguments(filters, kwargs) self._require_parameters(params, "product") # FIXME this will be moved to the DB in 7, see bug 740829 if params["product"].startswith("Fennec"): params["release_name"] = "mobile" else: params["release_name"] = params["product"] params["from_date"] = params["from_date"].date() sql = [ """/* socorro.external.postgresql.builds.Builds.builds */ SELECT version, platform, build_id as buildid, build_type, beta_number, repository, build_date(build_id) as date FROM releases_raw WHERE product_name = %(release_name)s """ ] if params["version"]: sql.append("AND version = %(version)s") sql.append(""" AND build_date(build_id) >= timestamp with time zone %(from_date)s AND repository IN ('mozilla-central', 'mozilla-1.9.2', 'comm-central', 'comm-1.9.2', 'comm-central-trunk', 'mozilla-central-android') ORDER BY build_date(build_id) DESC, product_name ASC, version ASC, platform ASC """) sql_query = " ".join(sql) # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() try: logger.debug(cur.mogrify(sql_query, params)) sql_results = db.execute(cur, sql_query, params) except Exception: sql_results = [] util.reportExceptionAndContinue(logger) results = [ dict( zip(("version", "platform", "buildid", "build_type", "beta_number", "repository", "date"), line)) for line in sql_results ] for i, line in enumerate(results): results[i]["product"] = params["product"] results[i]["buildid"] = int(line["buildid"]) results[i]["date"] = line["date"].strftime("%Y-%m-%d") return results
def versions_info(self, **kwargs): """ Return information about versions of a product. See http://socorro.readthedocs.org/en/latest/middleware.html Keyword arguments: versions - List of products and versions. Return: None if versions is null or empty ; Otherwise a dictionary of data about a version, i.e.: { "product_name:version_string": { "product_version_id": integer, "version_string": "string", "product_name": "string", "major_version": "string" or None, "release_channel": "string" or None, "build_id": [list, of, decimals] or None } } """ # Parse arguments filters = [ ("versions", None, ["list", "str"]) ] params = external_common.parse_arguments(filters, kwargs) if "versions" not in params or not params["versions"]: return None products_list = [] (versions_list, products_list) = Util.parse_versions( params["versions"], products_list) if not versions_list: return None versions = [] products = [] for x in xrange(0, len(versions_list), 2): products.append(versions_list[x]) versions.append(versions_list[x + 1]) params = {} params = Util.dispatch_params(params, "product", products) params = Util.dispatch_params(params, "version", versions) where = [] for i in range(len(products)): where.append(str(i).join(("(pi.product_name = %(product", ")s AND pi.version_string = %(version", ")s)"))) sql = """/* socorro.external.postgresql.util.Util.versions_info */ SELECT pv.product_version_id, pi.version_string, pi.product_name, which_table, pv.release_version, pv.build_type, pvb.build_id FROM product_info pi LEFT JOIN product_versions pv ON (pv.product_version_id = pi.product_version_id) JOIN product_version_builds pvb ON (pv.product_version_id = pvb.product_version_id) WHERE %s ORDER BY pv.version_sort """ % " OR ".join(where) # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() try: results = db.execute(cur, sql, params) except Exception: results = [] util.reportExceptionAndContinue(logger) res = {} for line in results: row = dict(zip(("product_version_id", "version_string", "product_name", "which_table", "major_version", "release_channel", "build_id"), line)) key = ":".join((row["product_name"], row["version_string"])) if key in res: # That key already exists, just add it the new buildid res[key]["build_id"].append(int(row["build_id"])) else: if row["which_table"] == "old": row["release_channel"] = row["build_id"] = None del row["which_table"] if row["build_id"]: row["build_id"] = [int(row["build_id"])] res[key] = row return res
def search(self, **kwargs): """ Search for crashes and return them. See http://socorro.readthedocs.org/en/latest/middleware.html#search Optional arguments: see SearchCommon.get_parameters() """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = search_common.get_parameters(kwargs) # Default mode falls back to starts_with for postgres if params["search_mode"] == "default": params["search_mode"] = "starts_with" if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # For Postgres, we never search for a list of terms if params["terms"]: params["terms"] = " ".join(params["terms"]) params["terms"] = Search.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = Search.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = Search.parse_versions(params["versions"], params["products"]) # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in self.context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = self.generate_sql_select(params) # Adding count for each OS for i in self.context.platforms: sql_params["os_%s" % i["id"]] = i["name"] sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_group = self.generate_sql_group(params) sql_order = """ ORDER BY total DESC, signature """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join( ("/* socorro.search.Search search */", sql_select, sql_from, sql_where, sql_group, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join( ("/* socorro.external.postgresql.search.Search search.count */", "SELECT count(DISTINCT r.signature)", sql_from, sql_where)) # Debug logger.debug(cur.mogrify(sql_query, sql_params)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = {"total": total, "hits": []} # Transforming the results into what we want for crash in results: if params["report_process"] == "plugin": row = dict( zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent", "pluginname", "pluginversion", "pluginfilename"), crash)) else: row = dict( zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent"), crash)) json_result["hits"].append(row) self.connection.close() return json_result
def get_frequency(self, **kwargs): """Return the number and frequency of crashes on each OS. See socorro.lib.search_common.get_parameters() for all filters. """ params = self.prepare_search_params(**kwargs) # Creating the parameters for the sql query sql_params = {"signature": params.signature} # Preparing the different parts of the sql query sql_select = [ """ SELECT r.build AS build_date, COUNT(CASE WHEN (r.signature = %(signature)s) THEN 1 END) AS count, CAST(COUNT(CASE WHEN (r.signature = %(signature)s) THEN 1 END) AS FLOAT(10)) / count(r.id) AS frequency, COUNT(r.id) AS total """ ] ## Adding count for each OS for i in self.context.platforms: sql_select.append(""" COUNT(CASE WHEN (r.signature = %%(signature)s AND r.os_name = '%s') THEN 1 END) AS count_%s """ % (i["name"], i["id"])) sql_select.append(""" CASE WHEN (COUNT(CASE WHEN (r.os_name = '%s') THEN 1 END) > 0) THEN (CAST(COUNT(CASE WHEN (r.signature = '%s' AND r.os_name = '%s') THEN 1 END) AS FLOAT(10)) / COUNT(CASE WHEN (r.os_name = '%s') THEN 1 END)) ELSE 0.0 END AS frequency_%s """ % (i["name"], params.signature, i["name"], i["name"], i["id"])) sql_select = ", ".join(sql_select) sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_group = "GROUP BY r.build" sql_order = "ORDER BY r.build DESC" # Assembling the query sql = " ".join( ("/* external.postgresql.crashes.Crashes.get_fequency */", sql_select, sql_from, sql_where, sql_group, sql_order)) sql = str(" ".join(sql.split())) # better formatting of the sql string result = {"total": 0, "hits": []} connection = None try: connection = self.database.connection() cur = connection.cursor() logger.debug(cur.mogrify(sql, sql_params)) results = db.execute(cur, sql, sql_params) except psycopg2.Error: logger.error("Failed retrieving extensions data from PostgreSQL", exc_info=True) else: fields = ["build_date", "count", "frequency", "total"] for i in self.context.platforms: fields.append("count_%s" % i["id"]) fields.append("frequency_%s" % i["id"]) for crash in results: row = dict(zip(fields, crash)) result["hits"].append(row) result["total"] = len(result["hits"]) finally: if connection: connection.close() return result
def _get_versions(self, params): """ Return product information for one or more product:version combinations """ products = [] (params["products_versions"], products) = self.parse_versions(params["versions"], []) sql_select = """ SELECT product_name as product, version_string as version, start_date, end_date, is_featured, build_type, throttle::float, has_builds FROM product_info """ sql_where = [] versions_list = [] products_list = [] for x in range(0, len(params["products_versions"]), 2): products_list.append(params["products_versions"][x]) versions_list.append(params["products_versions"][x + 1]) sql_where = [ "(product_name = %(product" + str(x) + ")s AND version_string = %(version" + str(x) + ")s)" for x in range(len(products_list)) ] sql_params = {} sql_params = add_param_to_dict(sql_params, "product", products_list) sql_params = add_param_to_dict(sql_params, "version", versions_list) if len(sql_where) > 0: sql_query = " WHERE ".join((sql_select, " OR ".join(sql_where))) else: sql_query = sql_select sql_query = """ /* socorro.external.postgresql.Products.get_versions */ %s """ % sql_query json_result = {"total": 0, "hits": []} try: connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: logger.error( "Failed retrieving products_versions data from PostgreSQL", exc_info=True) else: for product in results: row = dict( zip(("product", "version", "start_date", "end_date", "is_featured", "build_type", "throttle", "has_builds"), product)) json_result["hits"].append(row) row["start_date"] = datetimeutil.date_to_string( row["start_date"]) row["end_date"] = datetimeutil.date_to_string(row["end_date"]) json_result["total"] = len(json_result["hits"]) return json_result finally: connection.close()
def get_paireduuid(self, **kwargs): """Return paired uuid given a uuid and an optional hangid. If a hangid is passed, then return only one result. Otherwise, return all found paired uuids. """ filters = [ ("uuid", None, "str"), ("hangid", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) if not params.uuid: raise MissingOrBadArgumentError( "Mandatory parameter 'uuid' is missing or empty") crash_date = datetimeutil.uuid_to_date(params.uuid) sql = """ /* socorro.external.postgresql.crashes.Crashes.get_paireduuid */ SELECT uuid FROM reports r WHERE r.uuid != %(uuid)s AND r.date_processed BETWEEN TIMESTAMP %(crash_date)s - CAST('1 day' AS INTERVAL) AND TIMESTAMP %(crash_date)s + CAST('1 day' AS INTERVAL) """ sql_params = {"uuid": params.uuid, "crash_date": crash_date} if params.hangid is not None: sql = """%s AND r.hangid = %%(hangid)s LIMIT 1 """ % sql sql_params["hangid"] = params.hangid else: sql = """%s AND r.hangid IN ( SELECT hangid FROM reports r2 WHERE r2.date_processed BETWEEN TIMESTAMP %%(crash_date)s - CAST('1 day' AS INTERVAL) AND TIMESTAMP %%(crash_date)s + CAST('1 day' AS INTERVAL) AND r2.uuid = %%(uuid)s ) """ % sql result = {"total": 0, "hits": []} try: connection = self.database.connection() cur = connection.cursor() results = db.execute(cur, sql, sql_params) # Transforming the results into what we want for report in results: row = dict(zip(("uuid", ), report)) result["hits"].append(row) result["total"] = len(result["hits"]) except psycopg2.Error: logger.error("Failed to retrieve paired uuids from database", exc_info=True) finally: connection.close() return result
def getListOfTopCrashersBySignature(aCursor, dbParams): """ Answers a generator of tcbs rows """ assertPairs = { 'startDate': (datetime.date, datetime.datetime), 'to_date': (datetime.date, datetime.datetime), 'product': basestring, 'version': basestring, 'limit': int } for param in assertPairs: if not isinstance(dbParams[param], assertPairs[param]): raise ValueError(type(dbParams[param])) order_by = 'report_count' # default order field where = [''] # trick for the later join if dbParams['crash_type'] != 'all': where.append("process_type = '%s'" % (dbParams['crash_type'], )) if dbParams['os']: order_by = '%s_count' % dbParams['os'][0:3].lower() where.append("%s > 0" % order_by) where = ' AND '.join(where) table_to_use = 'tcbs' date_range_field = 'report_date' if dbParams['date_range_type'] == 'build': table_to_use = 'tcbs_build' date_range_field = 'build_date' sql = """ WITH tcbs_r as ( SELECT tcbs.signature_id, signature, pv.product_name, version_string, sum(report_count) as report_count, sum(win_count) as win_count, sum(lin_count) as lin_count, sum(mac_count) as mac_count, sum(hang_count) as hang_count, plugin_count(process_type,report_count) as plugin_count, content_count(process_type,report_count) as content_count, first_report, version_list, sum(startup_count) as startup_count FROM %s tcbs JOIN signatures USING (signature_id) JOIN product_versions AS pv USING (product_version_id) JOIN signature_products_rollup AS spr ON spr.signature_id = tcbs.signature_id AND spr.product_name = pv.product_name WHERE pv.product_name = '%s' AND version_string = '%s' AND tcbs.%s BETWEEN '%s' AND '%s' %s GROUP BY tcbs.signature_id, signature, pv.product_name, version_string, first_report, spr.version_list ), tcbs_window AS ( SELECT tcbs_r.*, sum(report_count) over () as total_crashes, dense_rank() over (order by report_count desc) as ranking FROM tcbs_r ) SELECT signature, report_count, win_count, lin_count, mac_count, hang_count, plugin_count, content_count, first_report, version_list, %s / total_crashes::float as percent_of_total, startup_count / %s::float as startup_percent FROM tcbs_window ORDER BY %s DESC LIMIT %s """ % (table_to_use, dbParams["product"], dbParams["version"], date_range_field, dbParams["startDate"], dbParams["to_date"], where, order_by, order_by, order_by, dbParams["limit"]) #logger.debug(aCursor.mogrify(sql, dbParams)) return db.execute(aCursor, sql)
def get(self, **kwargs): """ Search for crashes and return them. See http://socorro.readthedocs.org/en/latest/middleware.html#search Optional arguments: see SearchCommon.get_parameters() """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = search_common.get_parameters(kwargs) # change aliases from the web to the implementation's need if "for" in params and "terms" not in params: params["terms"] = params.get("for") if "from" in params and "from_date" not in params: params["from_date"] = params.get("from") if "to" in params and "to_date" not in params: params["to_date"] = params.get("to") if "in" in params and "fields" not in params: params["fields"] = params.get("in") # Default mode falls back to starts_with for postgres if params["search_mode"] == "default": params["search_mode"] = "starts_with" if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # For Postgres, we never search for a list of terms if params["terms"]: params["terms"] = " ".join(params["terms"]) params["terms"] = Search.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = Search.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = Search.parse_versions( params["versions"], params["products"]) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = { } # Preparing the different parts of the sql query sql_select = self.generate_sql_select(params) # Adding count for each OS for i in context.platforms: sql_params["os_%s" % i["id"]] = i["name"] sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_group = self.generate_sql_group(params) sql_order = """ ORDER BY total DESC, signature """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join(("/* socorro.search.Search search */", sql_select, sql_from, sql_where, sql_group, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join(( "/* socorro.external.postgresql.search.Search search.count */", "SELECT count(DISTINCT r.signature)", sql_from, sql_where)) # Debug logger.debug(cur.mogrify(sql_query, sql_params)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = { "total": total, "hits": [] } # Transforming the results into what we want for crash in results: if params["report_process"] == "plugin": row = dict(zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent", "pluginname", "pluginversion", "pluginfilename"), crash)) else: row = dict(zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent"), crash)) json_result["hits"].append(row) self.connection.close() return json_result
def get(self, **kwargs): filters = [ ("report_type", None, "str"), ("signature", None, "str"), ("start_date", None, "datetime"), ("end_date", None, "datetime"), ("versions", None, ["list", "str"]), ] params = external_common.parse_arguments(filters, kwargs) products = [] versions = [] # Get information about the versions util_service = Util(config=self.context) versions_info = util_service.versions_info(**params) if versions_info: for i, elem in enumerate(versions_info): products.append(versions_info[elem]["product_name"]) versions.append(str(versions_info[elem]["version_string"])) # This MUST be a tuple otherwise it gets cast to an array params['product'] = tuple(products) params['version'] = tuple(versions) if params['product'] and params['report_type'] is not 'products': product_list = ' AND product_name IN %s ' else: product_list = '' if params['version'] and params['report_type'] is not 'products': version_list = ' AND version_string IN %s ' else: version_list = '' query_params = report_type_sql.get(params['report_type'], {}) if (params['report_type'] not in ('products', 'distinct_install', 'exploitability', 'devices', 'graphics') and 'first_col' not in query_params): raise BadArgumentError('report type') self.connection = self.database.connection() cursor = self.connection.cursor() if params['report_type'] == 'products': result_cols = ['product_name', 'version_string', 'report_count', 'percentage'] query_string = """ WITH crashes as ( SELECT product_name as category , version_string , SUM(report_count) as report_count FROM signature_summary_products JOIN signatures USING (signature_id) WHERE signatures.signature = %s AND report_date >= %s AND report_date < %s GROUP BY product_name, version_string ), totals as ( SELECT category , version_string , report_count , SUM(report_count) OVER () as total_count FROM crashes ) SELECT category , version_string , report_count , round((report_count * 100::numeric)/total_count,3)::TEXT as percentage FROM totals ORDER BY report_count DESC""" query_parameters = (params['signature'], params['start_date'], params['end_date']) elif params['report_type'] == 'distinct_install': result_cols = ['product_name', 'version_string', 'crashes', 'installations'] query_string = """ SELECT product_name , version_string , SUM(crash_count) AS crashes , SUM(install_count) AS installations FROM signature_summary_installations JOIN signatures USING (signature_id) WHERE signatures.signature = %s AND report_date >= %s AND report_date < %s """ query_string += product_list query_string += version_list query_string += """ GROUP BY product_name, version_string ORDER BY crashes DESC """ query_parameters = ( params['signature'], params['start_date'], params['end_date'] ) if product_list: query_parameters += (params['product'],) if version_list: query_parameters += (params['version'],) elif params['report_type'] == 'exploitability': # Note, even if params['product'] is something we can't use # that in this query result_cols = [ 'report_date', 'null_count', 'none_count', 'low_count', 'medium_count', 'high_count', ] query_string = """ SELECT cast(report_date as TEXT), SUM(null_count), SUM(none_count), SUM(low_count), SUM(medium_count), SUM(high_count) FROM exploitability_reports JOIN signatures USING (signature_id) WHERE signatures.signature = %s AND report_date >= %s AND report_date < %s """ query_string += product_list query_string += version_list query_string += """ GROUP BY report_date ORDER BY report_date DESC """ query_parameters = ( params['signature'], params['start_date'], params['end_date'], ) if product_list: query_parameters += (params['product'],) if version_list: query_parameters += (params['version'],) elif params['report_type'] == 'devices': result_cols = [ 'cpu_abi', 'manufacturer', 'model', 'version', 'report_count', 'percentage', ] query_string = """ WITH crashes as ( SELECT android_devices.android_cpu_abi as cpu_abi, android_devices.android_manufacturer as manufacturer, android_devices.android_model as model, android_devices.android_version as version, SUM(report_count) as report_count FROM signature_summary_device JOIN signatures USING (signature_id) JOIN android_devices ON signature_summary_device.android_device_id = android_devices.android_device_id WHERE signatures.signature = %s AND report_date >= %s AND report_date < %s """ query_string += product_list query_string += version_list query_string += """ GROUP BY android_devices.android_cpu_abi, android_devices.android_manufacturer, android_devices.android_model, android_devices.android_version ), totals as ( SELECT cpu_abi, manufacturer, model, version, report_count, SUM(report_count) OVER () as total_count FROM crashes ) SELECT cpu_abi, manufacturer, model, version, report_count, round((report_count * 100::numeric)/total_count,3)::TEXT as percentage FROM totals ORDER BY report_count DESC """ query_parameters = ( params['signature'], params['start_date'], params['end_date'], ) if product_list: query_parameters += (params['product'],) if version_list: query_parameters += (params['version'],) elif params['report_type'] == 'graphics': result_cols = [ 'vendor_hex', 'adapter_hex', 'vendor_name', 'adapter_name', 'report_count', 'percentage', ] query_string = """ WITH crashes as ( SELECT graphics_device.vendor_hex as vendor_hex, graphics_device.adapter_hex as adapter_hex, graphics_device.vendor_name as vendor_name, graphics_device.adapter_name as adapter_name, SUM(report_count) as report_count FROM signature_summary_graphics JOIN signatures USING (signature_id) JOIN graphics_device ON signature_summary_graphics.graphics_device_id = graphics_device.graphics_device_id WHERE signatures.signature = %s AND report_date >= %s AND report_date < %s """ query_string += product_list query_string += version_list query_string += """ GROUP BY graphics_device.graphics_device_id ), totals as ( SELECT vendor_hex, adapter_hex, vendor_name, adapter_name, report_count, SUM(report_count) OVER () as total_count FROM crashes ) SELECT vendor_hex, adapter_hex, vendor_name, adapter_name, report_count, round((report_count * 100::numeric)/total_count,3)::TEXT as percentage FROM totals ORDER BY report_count DESC """ query_parameters = ( params['signature'], params['start_date'], params['end_date'], ) if product_list: query_parameters += (params['product'],) if version_list: query_parameters += (params['version'],) elif params['report_type'] in report_type_columns: result_cols = ['category', 'report_count', 'percentage'] query_string = """ WITH crashes AS ( SELECT """ query_string += report_type_columns[params['report_type']] query_string += """ AS category , sum(report_count) AS report_count FROM signature_summary_""" query_string += params['report_type'] query_string += """ JOIN signatures USING (signature_id) WHERE signatures.signature = %s AND report_date >= %s AND report_date < %s """ query_string += product_list query_string += version_list query_string += """ GROUP BY category ), totals AS ( SELECT category , report_count , sum(report_count) OVER () as total_count FROM crashes ) SELECT category , report_count , round((report_count * 100::numeric)/total_count,3)::TEXT as percentage FROM totals ORDER BY report_count DESC """ query_parameters = ( params['signature'], params['start_date'], params['end_date'] ) if product_list: query_parameters += (params['product'],) if version_list: query_parameters += (params['version'],) sql_results = db.execute(cursor, query_string, query_parameters) results = [] for row in sql_results: newrow = dict(zip(result_cols, row)) results.append(newrow) # Closing the connection here because we're not using # the parent class' query() self.connection.close() return results
def export(path, numberofdays=0): """Export crash reports from a PostgreSQL database. path - Directory where the dump file will be created. numberofdays - Number of days of crash reports to retrieve, before the most recent crash date. """ database = db.Database(config) connection = database.connection() cur = connection.cursor() crash_files = [] fields_list = ("client_crash_date", "date_processed", "uuid", "product", "version", "build", "signature", "url", "install_age", "last_crash", "uptime", "cpu_name", "cpu_info", "reason", "address", "os_name", "os_version", "email", "build_date", "user_id", "started_datetime", "completed_datetime", "success", "truncated", "processor_notes", "user_comments", "app_notes", "distributor", "distributor_version", "topmost_filenames", "addons_checked", "flash_version", "hangid", "process_type", "release_channel") # steps # 1. pull all distinct dates sql = """ SELECT DISTINCT to_char(date_processed, 'YYYY-MM-DD') as day FROM reports ORDER BY day DESC """ if numberofdays: sql = "%s LIMIT %s" % (sql, numberofdays) print 'Calculating dates... ' days = db.execute(cur, sql) days_list = [] for day in days: days_list.append(day[0]) #~ days_list = [ #~ '2012-03-04T00:00:00+00:00' #~ ] store_filename = 'dump.json' store_filename = os.path.normpath('%s/%s' % (path, store_filename)) store = open(store_filename, 'w') print 'Store file created: %s' % store_filename indexes_filename = 'indexes.txt' indexes_filename = os.path.normpath('%s/%s' % (path, indexes_filename)) indexes = open(indexes_filename, 'w') print 'Indexes file created: %s' % indexes_filename for day in days_list: date_from = dtu.datetimeFromISOdateString(day) date_to = date_from + datetime.timedelta(1) datestr = date_from.strftime('%y%m%d') es_index = 'socorro_%s' % datestr es_type = 'crash_reports' action_line = '{"index":{"_index":"%s","_type":"%s"}}\n' % ( es_index, es_type) indexes.write('%s\n' % es_index) # 2. for each date, pull all crashes of the day day_sql = " ".join(("SELECT %s" % ", ".join(fields_list), "FROM reports", "WHERE date_processed BETWEEN %s AND %s")) print 'Getting crash reports for day %s' % date_from.date() crashes_list = db.execute(cur, day_sql, (date_from, date_to)) for crash in crashes_list: # 3. for each crash report json_crash = dict(zip(fields_list, crash)) # stringify datetime fields for i in json_crash: if isinstance(json_crash[i], datetime.datetime): json_crash[i] = dtu.date_to_string(json_crash[i]) store.write(action_line) store.write('%s\n' % json.dumps(json_crash)) store.close() crash_files.append(store_filename) indexes.close() crash_files.append(indexes_filename) connection.close() return generate_dump(crash_files, path)