def test_build_query_from_params(self): # Test with all default parameters config = self.get_dummy_context() params = {} params = scommon.get_parameters(params) query = ElasticSearchBase.build_query_from_params(params, config) self.assertTrue(query) self.assertTrue("query" in query) self.assertTrue("size" in query) self.assertTrue("from" in query) # Searching for a term in a specific field and with a specific product params = { "terms": "hang", "fields": "dump", "search_mode": "contains", "products": "fennec" } params = scommon.get_parameters(params) query = ElasticSearchBase.build_query_from_params(params, config) self.assertTrue(query) self.assertTrue("query" in query) self.assertTrue("filtered" in query["query"]) filtered = query["query"]["filtered"] self.assertTrue("query" in filtered) self.assertTrue("wildcard" in filtered["query"]) self.assertTrue("dump" in filtered["query"]["wildcard"]) dump_term = filtered["query"]["wildcard"]["dump"] self.assertEqual(dump_term, "*hang*") self.assertTrue("filter" in filtered) self.assertTrue("and" in filtered["filter"])
def test_get_parameters(self): """ Test search_common.get_parameters() """ # Empty params, only default values are returned params = co.get_parameters({}) self.assertTrue(params) for i in params: typei = type(params[i]) if i in ("from_date", "to_date", "build_from", "build_to"): self.assertTrue(typei is datetime) else: self.assertTrue(not params[i] or typei is int or typei is str or typei is list) # Empty params params = co.get_parameters({ "terms": "", "fields": "", "products": "", "from_date": "", "to_date": "", "versions": "", "reasons": "", "release_channels": "", "os": "", "search_mode": "", "build_ids": "", "report_process": "", "report_type": "", "plugin_in": "", "plugin_search_mode": "", "plugin_terms": "" }) assert params, "SearchCommon.get_parameters() returned something " \ "empty or null." for i in params: typei = type(params[i]) if i in ("from_date", "to_date", "build_from", "build_to"): self.assertTrue(typei is datetime) else: self.assertTrue(not params[i] or typei is int or typei is str or typei is list) # Test with encoded slashes in terms and signature params = co.get_parameters({ "terms": ["some", "terms/sig"], "signature": "my/little/signature" }) self.assertTrue("signature" in params) self.assertTrue("terms" in params) self.assertEqual(params["terms"], ["some", "terms/sig"]) self.assertEqual(params["signature"], "my/little/signature")
def test_get_parameters(self): """ Test search_common.get_parameters() """ # Empty params, only default values are returned params = get_parameters({}) assert params for i in params: typei = type(params[i]) if i in ("from_date", "to_date", "build_from", "build_to"): assert typei is datetime.datetime else: assert not params[i] or typei in (int, str, list) # Empty params params = get_parameters({ "terms": "", "fields": "", "products": "", "from_date": "", "to_date": "", "versions": "", "reasons": "", "release_channels": "", "os": "", "search_mode": "", "build_ids": "", "report_process": "", "report_type": "", "plugin_in": "", "plugin_search_mode": "", "plugin_terms": "", }) assert params, "SearchCommon.get_parameters() returned something empty or null." for i in params: typei = type(params[i]) if i in ("from_date", "to_date", "build_from", "build_to"): assert typei is datetime.datetime else: assert not params[i] or typei in (int, str, list) # Test with encoded slashes in terms and signature params = get_parameters({ "terms": ["some", "terms/sig"], "signature": "my/little/signature" }) assert "signature" in params assert "terms" in params assert params["terms"] == ["some", "terms/sig"] assert params["signature"] == "my/little/signature"
def test_get_parameters(): """ Test SearchCommon.get_parameters() """ # Empty params, only default values are returned params = co.get_parameters({}) assert params, ( "SearchCommon.get_parameters() returned something empty or null.") for i in params: typei = type(params[i]) if i in ("from_date", "to_date", "build_from", "build_to"): assert typei is datetime, ( "The parameter %s is of a non expected type %s, " "should be datetime" % (i, typei)) else: assert (not params[i] or typei is int or typei is str or typei is list), ( "The parameter %s is of a non expected type %s" % (i, typei)) # Empty params params = co.get_parameters({ "terms": "", "fields": "", "products": "", "from_date": "", "to_date": "", "versions": "", "reasons": "", "os": "", "branches": "", "search_mode": "", "build_ids": "", "report_process": "", "report_type": "", "plugin_in": "", "plugin_search_mode": "", "plugin_terms": "" }) assert params, ( "SearchCommon.get_parameters() returned something empty or null.") for i in params: typei = type(params[i]) if i in ("from_date", "to_date", "build_from", "build_to"): assert typei is datetime, ( "The parameter %s is of a non expected type %s, " "should be datetime" % (i, typei)) else: assert (not params[i] or typei is int or typei is str or typei is list), ( "The parameter %s is of a non expected type %s" % (i, typei))
def test_get_parameters(self): """ Test search_common.get_parameters() """ # Empty params, only default values are returned params = get_parameters({}) assert params for i in params: typei = type(params[i]) if i in ("from_date", "to_date", "build_from", "build_to"): assert typei is datetime.datetime else: assert not params[i] or typei in (int, str, list) # Empty params params = get_parameters({ "terms": "", "fields": "", "products": "", "from_date": "", "to_date": "", "versions": "", "reasons": "", "release_channels": "", "os": "", "search_mode": "", "build_ids": "", "report_process": "", "report_type": "", "plugin_in": "", "plugin_search_mode": "", "plugin_terms": "" }) assert params, "SearchCommon.get_parameters() returned something empty or null." for i in params: typei = type(params[i]) if i in ("from_date", "to_date", "build_from", "build_to"): assert typei is datetime.datetime else: assert not params[i] or typei in (int, str, list) # Test with encoded slashes in terms and signature params = get_parameters({ "terms": ["some", "terms/sig"], "signature": "my/little/signature" }) assert "signature" in params assert "terms" in params assert params["terms"] == ["some", "terms/sig"] assert params["signature"] == "my/little/signature"
def test_build_query_from_params(): """ Test ElasticSearchBase.build_query_from_params() """ # Test with all default parameters args = { "config": get_dummy_context() } search = ElasticSearchBase(**args) params = {} params = scommon.get_parameters(params) query = ElasticSearchBase.build_query_from_params(params) assert query, "build_query_from_params returned a bad value: %s" % query assert "query" in query, ( "query is malformed, 'query' key missing: %s" % query) assert "size" in query, ( "query is malformed, 'size' key missing: %s" % query) assert "from" in query, ( "query is malformed, 'from' key missing: %s" % query) # Searching for a term in a specific field and with a specific product params = { "terms": "hang", "fields": "dump", "search_mode": "contains", "products": "fennec" } params = scommon.get_parameters(params) query = ElasticSearchBase.build_query_from_params(params) assert query, "build_query_from_params returned a bad value: %s" % query assert "query" in query, ( "query is malformed, 'query' key missing: %s" % query) assert "filtered" in query["query"], ( "query is malformed, 'filtered' key missing: %s" % query) filtered = query["query"]["filtered"] assert "query" in filtered, ( "query is malformed, 'query' key missing: %s" % query) assert "wildcard" in filtered["query"], ( "query is malformed, 'wildcard' key missing: %s" % query) assert "dump" in filtered["query"]["wildcard"], ( "query is malformed, 'dump' key missing: %s" % query) dump_term = filtered["query"]["wildcard"]["dump"] assert "*hang*" == dump_term, ( "query is malformed, value for wildcard is wrong: %s" % query) assert "filter" in filtered, ( "query is malformed, 'filter' key missing: %s" % query) assert "and" in filtered["filter"], ( "query is malformed, 'and' key missing: %s" % query)
def get(self, **kwargs): """ Search for crashes and return them. See http://socorro.readthedocs.org/en/latest/middleware.html#search Optional arguments: see SearchCommon.get_parameters() """ # change aliases from the web to the implementation's need if "for" in kwargs and "terms" not in kwargs: kwargs["terms"] = kwargs.get("for") if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") if "in" in kwargs and "fields" not in kwargs: kwargs["fields"] = kwargs.get("in") params = search_common.get_parameters(kwargs) # Get information about the versions versions_service = Util(config=self.context) params["versions_info"] = versions_service.versions_info(**params) # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in self.config.platforms: if platform["id"][:3] == elem[:3]: # the split is here to remove 'nt' from 'windows nt' # and 'os x' from 'mac os x' params["os"][i] = platform["name"].split(' ')[0] query = Search.build_query_from_params(params, self.config) # For signatures mode, we need to collect more data with facets if params["data_type"] == "signatures": # No need to get crashes, we only want signatures query["size"] = 0 query["from"] = 0 # Using a fixed number instead of the needed number. # This hack limits the number of distinct signatures to process, # and hugely improves performances with long queries. query["facets"] = Search.get_signatures_facet( self.config.searchMaxNumberOfDistinctSignatures) json_query = json.dumps(query) logger.debug("Query the crashes or signatures: %s", json_query) es_result = self.query(params["from_date"], params["to_date"], json_query) # Executing the query and returning the result if params["data_type"] == "signatures": return self.search_for_signatures(params, es_result, query) else: return es_result
def get(self, **kwargs): """ Search for crashes and return them. See http://socorro.readthedocs.org/en/latest/middleware.html#search Optional arguments: see SearchCommon.get_parameters() """ # change aliases from the web to the implementation's need if "for" in kwargs and "terms" not in kwargs: kwargs["terms"] = kwargs.get("for") if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") if "in" in kwargs and "fields" not in kwargs: kwargs["fields"] = kwargs.get("in") params = search_common.get_parameters(kwargs) # Get information about the versions versions_service = Util(config=self.context) params["versions_info"] = versions_service.versions_info(**params) query = Search.build_query_from_params(params, self.config) # For signatures mode, we need to collect more data with facets if params["data_type"] == "signatures": # No need to get crashes, we only want signatures query["size"] = 0 query["from"] = 0 # Using a fixed number instead of the needed number. # This hack limits the number of distinct signatures to process, # and hugely improves performances with long queries. query["facets"] = Search.get_signatures_facet( self.config.searchMaxNumberOfDistinctSignatures ) json_query = json.dumps(query) logger.debug("Query the crashes or signatures: %s", json_query) es_result = self.query(params["from_date"], params["to_date"], json_query) # Executing the query and returning the result if params["data_type"] == "signatures": return self.search_for_signatures(params, es_result, query) else: return es_result
def prepare_search_params(self, **kwargs): """Return a dictionary of parameters for a search-like SQL query. Uses socorro.lib.search_common.get_parameters() for arguments filtering. """ params = search_common.get_parameters(kwargs) if not params["signature"]: raise MissingOrBadArgumentError( "Mandatory parameter 'signature' is missing or empty" ) params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = Crashes.prepare_terms( params["plugin_terms"], params["plugin_search_mode"] ) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = Crashes.parse_versions( params["versions"], params["products"] ) # Changing the OS ids to OS names if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] return params
def prepare_search_params(self, **kwargs): """Return a dictionary of parameters for a search-like SQL query. Uses socorro.lib.search_common.get_parameters() for arguments filtering. """ params = search_common.get_parameters(kwargs) if not params["signature"]: raise MissingArgumentError('signature') params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = Crashes.prepare_terms( params["plugin_terms"], params["plugin_search_mode"] ) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = Crashes.parse_versions( params["versions"], params["products"] ) # Changing the OS ids to OS names if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] return params
def search(self, **kwargs): """ Search for crashes and return them. See http://socorro.readthedocs.org/en/latest/middleware.html#search Optional arguments: see SearchCommon.get_parameters() """ params = search_common.get_parameters(kwargs) # Get information about the versions versions_service = Util(config=self.context) params["versions_info"] = versions_service.versions_info(**params) query = Search.build_query_from_params(params, self.context) # For signatures mode, we need to collect more data with facets if params["data_type"] == "signatures": # No need to get crashes, we only want signatures query["size"] = 0 query["from"] = 0 # Using a fixed number instead of the needed number. # This hack limits the number of distinct signatures to process, # and hugely improves performances with long queries. try: context = self.context.webapi except KeyError: # old middleware context = self.context query["facets"] = Search.get_signatures_facet( context.searchMaxNumberOfDistinctSignatures) json_query = json.dumps(query) logger.debug("Query the crashes or signatures: %s", json_query) es_result = self.query(params["from_date"], params["to_date"], json_query) # Executing the query and returning the result if params["data_type"] == "signatures": return self.search_for_signatures(params, es_result, query) else: return es_result
def test_build_reports_sql_where(): """ Test PostgreSQLBase.build_reports_sql_where().""" pgbase = get_instance() params = search_common.get_parameters({}) # Get default search params sql_params = {} # Test 1: default values for parameters sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND %(to_date)s" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... assert sql == sql_exp, "Expected sql to be %s, got %s instead" % (sql_exp, sql) assert sql_params == sql_params_exp, "Expected sql params to be %s, got " \ "%s instead" % (sql_params_exp, sql_params)
def test_build_query_from_params(self): # Test with all default parameters config = self.get_dummy_context() params = {} params = scommon.get_parameters(params) query = ElasticSearchBase.build_query_from_params(params, config) self.assertTrue(query) self.assertTrue("query" in query) self.assertTrue("size" in query) self.assertTrue("from" in query) # Searching for a term in a specific field and with a specific product params = { "terms": "hang", "fields": "dump", "search_mode": "contains", "products": "fennec" } params = scommon.get_parameters(params) query = ElasticSearchBase.build_query_from_params(params, config) self.assertTrue(query) self.assertTrue("query" in query) self.assertTrue("filtered" in query["query"]) filtered = query["query"]["filtered"] self.assertTrue("query" in filtered) self.assertTrue("wildcard" in filtered["query"]) self.assertTrue("dump" in filtered["query"]["wildcard"]) dump_term = filtered["query"]["wildcard"]["dump"] self.assertEqual(dump_term, "*hang*") self.assertTrue("filter" in filtered) self.assertTrue("and" in filtered["filter"]) # Test versions params = { "products": "WaterWolf", "versions": "WaterWolf:1.0a1" } params = scommon.get_parameters(params) params['versions_info'] = { 'WaterWolf:1.0a1': { "version_string": "1.0a1", "product_name": "WaterWolf", "major_version": "1.0a1", "release_channel": "nightly-water", "build_id": None } } query = ElasticSearchBase.build_query_from_params(params, config) filtered = query["query"]["filtered"] self.assertTrue("and" in filtered["filter"]) and_filter_str = json.dumps(filtered["filter"]['and']) self.assertTrue('WaterWolf' in and_filter_str) self.assertTrue('1.0a1' in and_filter_str) self.assertTrue('nightly-water' in and_filter_str) # Test versions with an empty release channel in versions_info params = { "products": "WaterWolf", "versions": "WaterWolf:2.0" } params = scommon.get_parameters(params) params['versions_info'] = { 'WaterWolf:2.0': { "version_string": "2.0", "product_name": "WaterWolf", "major_version": "2.0", "release_channel": None, "build_id": None } } query = ElasticSearchBase.build_query_from_params(params, config) filtered = query["query"]["filtered"] self.assertTrue("and" in filtered["filter"]) and_filter_str = json.dumps(filtered["filter"]['and']) self.assertTrue('WaterWolf' in and_filter_str) self.assertTrue('2.0' in and_filter_str)
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Both `from_date` and `to_date` (and their aliases `from` and `to`) are required and can not be greater than 30 days apart. Optional arguments: see SearchCommon.get_parameters() """ # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") if not kwargs.get('from_date'): raise MissingArgumentError('from_date') if not kwargs.get('to_date'): raise MissingArgumentError('to_date') from_date = datetimeutil.datetimeFromISOdateString(kwargs['from_date']) to_date = datetimeutil.datetimeFromISOdateString(kwargs['to_date']) span_days = (to_date - from_date).days if span_days > 30: raise BadArgumentError( 'Span between from_date and to_date can not be more than 30') # start with the default sort_order = {'key': 'date_processed', 'direction': 'DESC'} if 'sort' in kwargs: sort_order['key'] = kwargs.pop('sort') _recognized_sort_orders = ( 'date_processed', 'uptime', 'user_comments', 'uuid', 'uuid_text', 'product', 'version', 'build', 'signature', 'url', 'os_name', 'os_version', 'cpu_name', 'cpu_info', 'address', 'reason', 'last_crash', 'install_age', 'hangid', 'process_type', 'release_channel', 'install_time', 'duplicate_of', ) if sort_order['key'] not in _recognized_sort_orders: raise BadArgumentError( '%s is not a recognized sort order key' % sort_order['key']) sort_order['direction'] = 'ASC' if 'reverse' in kwargs: if kwargs.pop('reverse'): sort_order['direction'] = 'DESC' include_raw_crash = kwargs.get('include_raw_crash') or False params = search_common.get_parameters(kwargs) if not params["signature"]: raise MissingArgumentError('signature') params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions(params["versions"], params["products"]) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"][:3] == elem[:3]: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid::uuid, r.uuid as uuid_text, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, r.release_channel, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time """ if include_raw_crash: pass else: sql_select += """ , rd.duplicate_of """ wrapped_select = """ WITH report_slice AS ( %s ), dupes AS ( SELECT report_slice.uuid, rd.duplicate_of FROM reports_duplicates rd JOIN report_slice ON report_slice.uuid_text = rd.uuid WHERE rd.date_processed BETWEEN %%(from_date)s AND %%(to_date)s ) SELECT rs.*, dupes.duplicate_of, rc.raw_crash FROM report_slice rs LEFT OUTER JOIN dupes USING (uuid) LEFT OUTER JOIN raw_crashes rc ON rs.uuid = rc.uuid AND rc.date_processed BETWEEN %%(from_date)s AND %%(to_date)s """ sql_from = self.build_reports_sql_from(params) if not include_raw_crash: sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from sql_where, sql_params = self.build_reports_sql_where( params, sql_params, self.context) sql_order = """ ORDER BY %(key)s %(direction)s """ % sort_order sql_limit, sql_params = self.build_reports_sql_limit( params, sql_params) # Assembling the query if include_raw_crash: sql_query = "\n".join( ("/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit)) else: sql_query = "\n".join( ("/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit)) # Query for counting the results sql_count_query = "\n".join( ("/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where)) # Querying the DB with self.get_connection() as connection: total = self.count( sql_count_query, sql_params, error_message="Failed to count crashes from reports.", connection=connection) # No need to call Postgres if we know there will be no results if total: if include_raw_crash: sql_query = wrapped_select % sql_query results = self.query( sql_query, sql_params, error_message="Failed to retrieve crashes from reports", connection=connection) else: results = [] # Transforming the results into what we want fields = ( "date_processed", "uptime", "user_comments", "uuid", "uuid", # the uuid::text one "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "release_channel", "install_time", "duplicate_of", ) if include_raw_crash: fields += ("raw_crash", ) crashes = [] for row in results: crash = dict(zip(fields, row)) if include_raw_crash and crash['raw_crash']: crash['raw_crash'] = json.loads(crash['raw_crash']) for i in crash: try: crash[i] = datetimeutil.date_to_string(crash[i]) except TypeError: pass crashes.append(crash) return {"hits": crashes, "total": total}
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Optional arguments: see SearchCommon.get_parameters() """ # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") params = search_common.get_parameters(kwargs) if not params["signature"]: raise MissingOrBadArgumentError("Mandatory parameter 'signature' is missing or empty") params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms(params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions(params["versions"], params["products"]) if hasattr(self.context, "webapi"): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"][:3] == elem[:3]: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time, rd.duplicate_of """ sql_from = self.build_reports_sql_from(params) sql_from = ( """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from ) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_order = """ ORDER BY r.date_processed DESC """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join( ( "/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit, ) ) # Query for counting the results sql_count_query = " ".join( ("/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where) ) # Querying the DB try: connection = self.database.connection() total = self.count( sql_count_query, sql_params, error_message="Failed to count crashes from PostgreSQL.", connection=connection, ) results = [] # No need to call Postgres if we know there will be no results if total != 0: results = self.query( sql_query, sql_params, error_message="Failed to retrieve crashes from PostgreSQL", connection=connection, ) except psycopg2.Error: raise DatabaseError("Failed to retrieve crashes from PostgreSQL") finally: if connection: connection.close() # Transforming the results into what we want crashes = [] for row in results: crash = dict( zip( ( "date_processed", "uptime", "user_comments", "uuid", "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "install_time", "duplicate_of", ), row, ) ) for i in crash: try: crash[i] = datetimeutil.date_to_string(crash[i]) except TypeError: pass crashes.append(crash) return {"hits": crashes, "total": total}
def search(self, **kwargs): """ Search for crashes and return them. See http://socorro.readthedocs.org/en/latest/middleware.html#search Optional arguments: see SearchCommon.get_parameters() """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = search_common.get_parameters(kwargs) # Default mode falls back to starts_with for postgres if params["search_mode"] == "default": params["search_mode"] = "starts_with" if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # For Postgres, we never search for a list of terms if params["terms"]: params["terms"] = " ".join(params["terms"]) params["terms"] = Search.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = Search.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = Search.parse_versions(params["versions"], params["products"]) # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in self.context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = self.generate_sql_select(params) # Adding count for each OS for i in self.context.platforms: sql_params["os_%s" % i["id"]] = i["name"] sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_group = self.generate_sql_group(params) sql_order = """ ORDER BY total DESC, signature """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join( ("/* socorro.search.Search search */", sql_select, sql_from, sql_where, sql_group, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join( ("/* socorro.external.postgresql.search.Search search.count */", "SELECT count(DISTINCT r.signature)", sql_from, sql_where)) # Debug logger.debug(cur.mogrify(sql_query, sql_params)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = {"total": total, "hits": []} # Transforming the results into what we want for crash in results: if params["report_process"] == "plugin": row = dict( zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent", "pluginname", "pluginversion", "pluginfilename"), crash)) else: row = dict( zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent"), crash)) json_result["hits"].append(row) self.connection.close() return json_result
def get(self, **kwargs): """ Search for crashes and return them. See http://socorro.readthedocs.org/en/latest/middleware.html#search Optional arguments: see SearchCommon.get_parameters() """ params = search_common.get_parameters(kwargs) # change aliases from the web to the implementation's need if "for" in params and "terms" not in params: params["terms"] = params.get("for") if "from" in params and "from_date" not in params: params["from_date"] = params.get("from") if "to" in params and "to_date" not in params: params["to_date"] = params.get("to") if "in" in params and "fields" not in params: params["fields"] = params.get("in") # Default mode falls back to starts_with for postgres if params["search_mode"] == "default": params["search_mode"] = "starts_with" if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # For Postgres, we never search for a list of terms if params["terms"]: params["terms"] = " ".join(params["terms"]) params["terms"] = Search.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = Search.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = Search.parse_versions(params["versions"], params["products"]) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"][:3] == elem[:3]: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = self.generate_sql_select(params) # Adding count for each OS for i in context.platforms: sql_params["os_%s" % i["id"]] = i["name"] sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, context) sql_group = self.generate_sql_group(params) sql_order = """ ORDER BY total DESC, signature """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join( ("/* socorro.search.Search search */", sql_select, sql_from, sql_where, sql_group, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join( ("/* socorro.external.postgresql.search.Search search.count */", "SELECT count(DISTINCT r.signature)", sql_from, sql_where)) # Querying the database try: connection = self.database.connection() total = self.count( sql_count_query, sql_params, error_message="Failed to count crashes from PostgreSQL.", connection=connection) results = [] # No need to call Postgres if we know there will be no results if total != 0: results = self.query( sql_query, sql_params, error_message="Failed to retrieve crashes from PostgreSQL", connection=connection) except psycopg2.Error: raise DatabaseError("Failed to retrieve crashes from PostgreSQL") finally: if connection: connection.close() # Transforming the results into what we want crashes = [] for row in results: if params["report_process"] == "plugin": crash = dict( zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent", "pluginname", "pluginversion", "pluginfilename"), row)) else: crash = dict( zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent"), row)) crashes.append(crash) return {"hits": crashes, "total": total}
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Optional arguments: see SearchCommon.get_parameters() """ # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") params = search_common.get_parameters(kwargs) if params["signature"] is None: return None params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions(params["versions"], params["products"]) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time, rd.duplicate_of """ sql_from = self.build_reports_sql_from(params) sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_order = """ ORDER BY r.date_processed DESC """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join( ("/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join( ("/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where)) # Querying the DB try: connection = self.database.connection() total = self.count( sql_count_query, sql_params, error_message="Failed to count crashes from PostgreSQL.", connection=connection) results = [] # No need to call Postgres if we know there will be no results if total != 0: results = self.query( sql_query, sql_params, error_message="Failed to retrieve crashes from PostgreSQL", connection=connection) except psycopg2.Error: raise DatabaseError("Failed to retrieve crashes from PostgreSQL") finally: if connection: connection.close() # Transforming the results into what we want crashes = [] for row in results: crash = dict( zip(("date_processed", "uptime", "user_comments", "uuid", "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "install_time", "duplicate_of"), row)) for i in crash: try: crash[i] = datetimeutil.date_to_string(crash[i]) except TypeError: pass crashes.append(crash) return {"hits": crashes, "total": total}
def test_build_reports_sql_where(self): """ Test PostgreSQLBase.build_reports_sql_where().""" config = self.get_dummy_context() pgbase = self.get_instance() params = search_common.get_parameters({}) # Get default search params default_params = util.DotDict(params.copy()) sql_params = {} # ..................................................................... # Test 1: default values for parameters sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 2: terms and search_mode = is_exactly sql_params = {} params.terms = "signature" params.search_mode = "is_exactly" sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.signature=%(term)s" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "term": params.terms } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 3: terms and search_mode != is_exactly sql_params = {} params.terms = "signature%" params.search_mode = "starts_with" sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.signature LIKE %(term)s" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "term": params.terms } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 4: products sql_params = {} params.terms = default_params.terms params.search_mode = default_params.search_mode params.products = ["Firefox", "Fennec"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND (r.product=%(product0)s OR " \ "r.product=%(product1)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "product0": "Firefox", "product1": "Fennec" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 5: os sql_params = {} params.products = default_params.products params.os = ["Windows"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND (r.os_name=%(os0)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "os0": "Windows" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 6: branches sql_params = {} params.os = default_params.os params.branches = ["2.2", "2.3", "4.0"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND (branches.branch=%(branch0)s OR " \ "branches.branch=%(branch1)s OR branches.branch=%(branch2)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "branch0": "2.2", "branch1": "2.3", "branch2": "4.0" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 7: build_ids sql_params = {} params.branches = default_params.branches params.build_ids = ["20120101123456"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND (r.build=%(build0)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "build0": "20120101123456" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 8: reasons sql_params = {} params.build_ids = default_params.build_ids params.reasons = ["EXCEPTION", "OVERFLOW"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND (r.reason=%(reason0)s OR " \ "r.reason=%(reason1)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "reason0": "EXCEPTION", "reason1": "OVERFLOW" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 9: report_type sql_params = {} params.reasons = default_params.reasons params.report_type = "crash" sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.hangid IS NULL" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 10: versions sql_params = {} params.report_type = default_params.report_type params.versions = [ "Firefox", "12.0a1", "Fennec", "11.0", "Firefox", "13.0(beta)" ] params.versions_info = { "Firefox:12.0a1": { "version_string": "12.0a1", "product_name": "Firefox", "major_version": "12.0", "release_channel": "Nightly", "build_id": ["20120101123456"] }, "Fennec:11.0": { "version_string": "11.0", "product_name": "Fennec", "major_version": None, "release_channel": None, "build_id": None }, "Firefox:13.0(beta)": { "version_string": "13.0(beta)", "product_name": "Firefox", "major_version": "13.0", "release_channel": "Beta", "build_id": ["20120101123456", "20120101098765"] } } sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND ((r.product=%(version0)s AND " \ "r.release_channel ILIKE 'nightly' AND " \ "r.version=%(version1)s) OR (r.product=%(version2)s AND " \ "r.version=%(version3)s) OR (r.product=%(version4)s AND " \ "r.release_channel ILIKE 'beta' AND r.build IN " \ "('20120101123456', '20120101098765') AND " \ "r.version=%(version5)s))" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "version0": "Firefox", "version1": "12.0", "version2": "Fennec", "version3": "11.0", "version4": "Firefox", "version5": "13.0" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 11: report_process = plugin sql_params = {} params.versions = default_params.versions params.versions_infos = None params.report_process = "plugin" sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.process_type = 'plugin' AND " \ "plugins_reports.date_processed BETWEEN " \ "%(from_date)s AND %(to_date)s" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 12: report_process != plugin sql_params = {} params.report_process = "content" sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.process_type = 'content'" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 13: plugins sql_params = {} params.report_process = "plugin" params.plugin_terms = "plugin_name" params.plugin_search_mode = "is_exactly" params.plugin_in = ["name"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.process_type = 'plugin' AND " \ "plugins_reports.date_processed BETWEEN " \ "%(from_date)s AND %(to_date)s AND " \ "(plugins.name=%(plugin_term)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "plugin_term": params.plugin_terms } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp)
def get(self, **kwargs): """ Search for crashes and return them. See http://socorro.readthedocs.org/en/latest/middleware.html#search Optional arguments: see SearchCommon.get_parameters() """ # change aliases from the web to the implementation's need if "for" in kwargs and "terms" not in kwargs: kwargs["terms"] = kwargs.get("for") if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") if "in" in kwargs and "fields" not in kwargs: kwargs["fields"] = kwargs.get("in") params = search_common.get_parameters(kwargs) # Default mode falls back to starts_with for postgres if params["search_mode"] == "default": params["search_mode"] = "starts_with" if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # For Postgres, we never search for a list of terms if params["terms"]: params["terms"] = " ".join(params["terms"]) params["terms"] = Search.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = Search.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = Search.parse_versions( params["versions"], params["products"]) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"][:3] == elem[:3]: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = self.generate_sql_select(params) # Adding count for each OS for i in context.platforms: sql_params["os_%s" % i["id"]] = i["name"] sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, context) sql_group = self.generate_sql_group(params) sql_order = """ ORDER BY total DESC, signature """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join(("/* socorro.search.Search search */", sql_select, sql_from, sql_where, sql_group, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join(( "/* socorro.external.postgresql.search.Search search.count */", "SELECT count(DISTINCT r.signature)", sql_from, sql_where)) # Querying the database error_message = "Failed to retrieve crashes from PostgreSQL" with self.get_connection() as connection: try: total = self.count( sql_count_query, sql_params, error_message="Failed to count crashes from PostgreSQL.", connection=connection ) results = [] # No need to call Postgres if we know there will be no results if total != 0: results = self.query( sql_query, sql_params, error_message=error_message, connection=connection ) except psycopg2.Error: raise DatabaseError(error_message) # Transforming the results into what we want crashes = [] for row in results: if params["report_process"] == "plugin": crash = dict(zip(( "signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent", "pluginname", "pluginversion", "pluginfilename" ), row)) else: crash = dict(zip(( "signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent" ), row)) crashes.append(crash) return { "hits": crashes, "total": total }
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Optional arguments: see SearchCommon.get_parameters() """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = search_common.get_parameters(kwargs) if params["signature"] is None: return None params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions(params["versions"], params["products"]) # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in self.context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = {} # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time, rd.duplicate_of """ sql_from = self.build_reports_sql_from(params) sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_order = """ ORDER BY r.date_processed DESC """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join( ("/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join( ("/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where)) # Debug logger.debug(sql_count_query) logger.debug(cur.mogrify(sql_count_query, sql_params)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = {"total": total, "hits": []} # Transforming the results into what we want for crash in results: row = dict( zip(("date_processed", "uptime", "user_comments", "uuid", "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "install_time", "duplicate_of"), crash)) for i in row: if isinstance(row[i], datetime.datetime): row[i] = str(row[i]) json_result["hits"].append(row) self.connection.close() return json_result
def test_build_query_from_params(self): # Test with all default parameters config = self.get_dummy_context() params = {} params = scommon.get_parameters(params) query = ElasticSearchBase.build_query_from_params(params, config) self.assertTrue(query) self.assertTrue("query" in query) self.assertTrue("size" in query) self.assertTrue("from" in query) # Searching for a term in a specific field and with a specific product params = { "terms": "hang", "fields": "dump", "search_mode": "contains", "products": "fennec" } params = scommon.get_parameters(params) query = ElasticSearchBase.build_query_from_params(params, config) self.assertTrue(query) self.assertTrue("query" in query) self.assertTrue("filtered" in query["query"]) filtered = query["query"]["filtered"] self.assertTrue("query" in filtered) self.assertTrue("wildcard" in filtered["query"]) self.assertTrue( "processed_crash.dump" in filtered["query"]["wildcard"]) dump_term = filtered["query"]["wildcard"]["processed_crash.dump"] self.assertEqual(dump_term, "*hang*") self.assertTrue("filter" in filtered) self.assertTrue("and" in filtered["filter"]) # Test versions params = {"products": "WaterWolf", "versions": "WaterWolf:1.0a1"} params = scommon.get_parameters(params) params["versions_info"] = { "WaterWolf:1.0a1": { "product_version_id": 1, "version_string": "1.0a1", "product_name": "WaterWolf", "major_version": "1.0a1", "release_channel": "nightly-water", "build_id": None, "is_rapid_beta": False, "is_from_rapid_beta": False, "from_beta_version": "WaterWolf:1.0a1", } } query = ElasticSearchBase.build_query_from_params(params, config) filtered = query["query"]["filtered"] self.assertTrue("and" in filtered["filter"]) and_filter_str = json.dumps(filtered["filter"]['and']) self.assertTrue('WaterWolf' in and_filter_str) self.assertTrue('1.0a1' in and_filter_str) self.assertTrue('nightly-water' in and_filter_str) # Test versions with an empty release channel in versions_info params = {"products": "WaterWolf", "versions": "WaterWolf:2.0"} params = scommon.get_parameters(params) params['versions_info'] = { 'WaterWolf:2.0': { "version_string": "2.0", "product_name": "WaterWolf", "major_version": "2.0", "release_channel": None, "build_id": None, "is_rapid_beta": False, "is_from_rapid_beta": False, "from_beta_version": "WaterWolf:2.0", } } query = ElasticSearchBase.build_query_from_params(params, config) filtered = query["query"]["filtered"] self.assertTrue("and" in filtered["filter"]) and_filter_str = json.dumps(filtered["filter"]['and']) self.assertTrue('WaterWolf' in and_filter_str) self.assertTrue('2.0' in and_filter_str)
def search(self, **kwargs): """ Search for crashes and return them. See http://socorro.readthedocs.org/en/latest/middleware.html#search Optional arguments: see SearchCommon.get_parameters() """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = search_common.get_parameters(kwargs) # Default mode falls back to starts_with for postgres if params["search_mode"] == "default": params["search_mode"] = "starts_with" if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # For Postgres, we never search for a list of terms if params["terms"]: params["terms"] = " ".join(params["terms"]) params["terms"] = Search.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = Search.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = Search.parse_versions( params["versions"], params["products"]) # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in self.context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = { "from_date": params["from_date"], "to_date": params["to_date"], "limit": params["result_number"], "offset": params["result_offset"] } sql_params = Search.dispatch_params(sql_params, "term", params["terms"]) sql_params = Search.dispatch_params(sql_params, "product", params["products"]) sql_params = Search.dispatch_params(sql_params, "os", params["os"]) sql_params = Search.dispatch_params(sql_params, "version", params["versions"]) sql_params = Search.dispatch_params(sql_params, "build", params["build_ids"]) sql_params = Search.dispatch_params(sql_params, "reason", params["reasons"]) sql_params = Search.dispatch_params(sql_params, "plugin_term", params["plugin_terms"]) sql_params = Search.dispatch_params(sql_params, "branch", params["branches"]) # Preparing the different parts of the sql query #--------------------------------------------------------------- # SELECT #--------------------------------------------------------------- sql_select = self.generate_sql_select(params) # Adding count for each OS for i in self.context.platforms: sql_params["os_%s" % i["id"]] = i["name"] #--------------------------------------------------------------- # FROM #--------------------------------------------------------------- sql_from = self.generate_sql_from(params) #--------------------------------------------------------------- # WHERE #--------------------------------------------------------------- sql_where = [""" WHERE r.date_processed BETWEEN %(from_date)s AND %(to_date)s """] ## Adding terms to where clause if params["terms"]: if params["search_mode"] == "is_exactly": sql_where.append("r.signature=%(term)s") else: sql_where.append("r.signature LIKE %(term)s") ## Adding products to where clause if params["products"]: products_list = ["r.product=%(product" + str(x) + ")s" for x in range(len(params["products"]))] sql_where.append("(%s)" % (" OR ".join(products_list))) ## Adding OS to where clause if params["os"]: os_list = ["r.os_name=%(os" + str(x) + ")s" for x in range(len(params["os"]))] sql_where.append("(%s)" % (" OR ".join(os_list))) ## Adding branches to where clause if params["branches"]: branches_list = ["branches.branch=%(branch" + str(x) + ")s" for x in range(len(params["branches"]))] sql_where.append("(%s)" % (" OR ".join(branches_list))) ## Adding versions to where clause if params["versions"]: # Get information about the versions versions_service = Util(config=self.context) fakeparams = { "versions": params["versions_string"] } versions_info = versions_service.versions_info(**fakeparams) if isinstance(params["versions"], list): versions_where = [] for x in range(0, len(params["versions"]), 2): version_where = [] version_where.append(str(x).join(("r.product=%(version", ")s"))) key = "%s:%s" % (params["versions"][x], params["versions"][x + 1]) version_where = self.generate_version_where( key, params["versions"], versions_info, x, sql_params, version_where) version_where.append(str(x + 1).join(( "r.version=%(version", ")s"))) versions_where.append("(%s)" % " AND ".join(version_where)) sql_where.append("(%s)" % " OR ".join(versions_where)) else: # Original product:value key = "%s:%s" % (params["products"], params["versions"]) version_where = [] version_where = self.generate_version_where( key, params["versions"], versions_info, None, sql_params, version_where) version_where.append("r.version=%(version)s") sql_where.append("(%s)" % " AND ".join(version_where)) ## Adding build id to where clause if params["build_ids"]: build_ids_list = ["r.build=%(build" + str(x) + ")s" for x in range(len(params["build_ids"]))] sql_where.append("(%s)" % (" OR ".join(build_ids_list))) ## Adding reason to where clause if params["reasons"]: reasons_list = ["r.reason=%(reason" + str(x) + ")s" for x in range(len(params["reasons"]))] sql_where.append("(%s)" % (" OR ".join(reasons_list))) if params["report_type"] == "crash": sql_where.append("r.hangid IS NULL") elif params["report_type"] == "hang": sql_where.append("r.hangid IS NOT NULL") ## Searching through plugins if params["report_process"] == "plugin": sql_where.append("r.process_type = 'plugin'") sql_where.append(("plugins_reports.date_processed BETWEEN " "%(from_date)s AND %(to_date)s")) if params["plugin_terms"]: comp = "=" if params["plugin_search_mode"] in ("contains", "starts_with"): comp = " LIKE " sql_where_plugin_in = [] for f in params["plugin_in"]: if f == "name": field = "plugins.name" elif f == "filename": field = "plugins.filename" sql_where_plugin_in.append(comp.join((field, "%(plugin_term)s"))) sql_where.append("(%s)" % " OR ".join(sql_where_plugin_in)) elif params["report_process"] == "browser": sql_where.append("r.process_type IS NULL") elif params["report_process"] == "content": sql_where.append("r.process_type = 'content'") sql_where = " AND ".join(sql_where) #--------------------------------------------------------------- # GROUP BY #--------------------------------------------------------------- sql_group = self.generate_sql_group(params) #--------------------------------------------------------------- # ORDER BY #--------------------------------------------------------------- sql_order = """ ORDER BY total DESC """ #--------------------------------------------------------------- # LIMIT OFFSET #--------------------------------------------------------------- sql_limit = """ LIMIT %(limit)s OFFSET %(offset)s """ # Assembling the query sql_from = " JOIN ".join(sql_from) sql_query = " ".join(("/* socorro.search.Search search */", sql_select, sql_from, sql_where, sql_group, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join(( "/* socorro.external.postgresql.search.Search search.count */", "SELECT count(DISTINCT r.signature)", sql_from, sql_where)) # Debug logger.debug(cur.mogrify(sql_query, sql_params)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except Exception: total = 0 util.reportExceptionAndContinue(logger) # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except Exception: results = [] util.reportExceptionAndContinue(logger) else: results = [] json_result = { "total": total, "hits": [] } # Transforming the results into what we want for crash in results: if params["report_process"] == "plugin": row = dict(zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent", "pluginname", "pluginversion", "pluginfilename"), crash)) else: row = dict(zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent"), crash)) json_result["hits"].append(row) self.connection.close() return json_result
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Optional arguments: see SearchCommon.get_parameters() """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = search_common.get_parameters(kwargs) if params["signature"] is None: return None params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions( params["versions"], params["products"]) # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in self.context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = { } # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time, rd.duplicate_of """ sql_from = self.build_reports_sql_from(params) sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_order = """ ORDER BY r.date_processed DESC """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join(( "/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join(( "/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where)) # Debug logger.debug(sql_count_query) logger.debug(cur.mogrify(sql_count_query, sql_params)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = { "total": total, "hits": [] } # Transforming the results into what we want for crash in results: row = dict(zip(( "date_processed", "uptime", "user_comments", "uuid", "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "install_time", "duplicate_of"), crash)) for i in row: if isinstance(row[i], datetime.datetime): row[i] = str(row[i]) json_result["hits"].append(row) self.connection.close() return json_result
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Optional arguments: see SearchCommon.get_parameters() """ params = search_common.get_parameters(kwargs) # Search does not have a signature parameter, so we handle this one # separately and make sure it's a list of one string only. params["signature"] = kwargs.get("signature") if params["signature"] is None: return None if not isinstance(params["signature"], list): params["signature"] = [params["signature"]] # If using full days, remove the time part of datetimes if params["use_full_days"]: params["from_date"] = params["from_date"].date() params["to_date"] = params["to_date"].date() # Get information about the versions versions_service = VersionsInfo(self.context) params["versions_info"] = versions_service.versions_info(params) # Whatever the source was, we always search for an exact signature params["terms"] = params["signature"] params["search_mode"] = "is_exactly" query = self.build_query_from_params(params) json_query = json.dumps(query) logger.debug("Query the crashes or signatures: %s", json_query) es_result = self.query(params["from_date"], params["to_date"], json_query) es_data = json.loads(es_result[0]) if es_data: total = es_data["hits"]["total"] hits = es_data["hits"]["hits"] else: total = 0 hits = [] # filter results fields fields = [ "date_processed", "uptime", "user_comments", "uuid", "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "client_crash_date" ] filtered_hits = [] for hit in hits: filtered_hit = dict((f, hit["_source"][f]) for f in hit["_source"] if f in fields) client_crash_date = datetimeutil.string_to_datetime( filtered_hit["client_crash_date"]) install_age = datetime.timedelta(0, filtered_hit["install_age"]) install_time = client_crash_date - install_age filtered_hit["install_time"] = install_time.strftime( "%Y-%m-%d %H:%M:%S.%f") filtered_hits.append(filtered_hit) results = { "total": total, "hits": filtered_hits } return results
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Both `from_date` and `to_date` (and their aliases `from` and `to`) are required and can not be greater than 30 days apart. Optional arguments: see SearchCommon.get_parameters() """ # aliases if "from" in kwargs and "from_date" not in kwargs: kwargs["from_date"] = kwargs.get("from") if "to" in kwargs and "to_date" not in kwargs: kwargs["to_date"] = kwargs.get("to") if not kwargs.get('from_date'): raise MissingArgumentError('from_date') if not kwargs.get('to_date'): raise MissingArgumentError('to_date') from_date = datetimeutil.datetimeFromISOdateString(kwargs['from_date']) to_date = datetimeutil.datetimeFromISOdateString(kwargs['to_date']) span_days = (to_date - from_date).days if span_days > 30: raise BadArgumentError( 'Span between from_date and to_date can not be more than 30' ) # start with the default sort_order = { 'key': 'date_processed', 'direction': 'DESC' } if 'sort' in kwargs: sort_order['key'] = kwargs.pop('sort') _recognized_sort_orders = ( 'date_processed', 'uptime', 'user_comments', 'uuid', 'uuid_text', 'product', 'version', 'build', 'signature', 'url', 'os_name', 'os_version', 'cpu_name', 'cpu_info', 'address', 'reason', 'last_crash', 'install_age', 'hangid', 'process_type', 'release_channel', 'install_time', 'duplicate_of', ) if sort_order['key'] not in _recognized_sort_orders: raise BadArgumentError( '%s is not a recognized sort order key' % sort_order['key'] ) sort_order['direction'] = 'ASC' if 'reverse' in kwargs: if kwargs.pop('reverse'): sort_order['direction'] = 'DESC' include_raw_crash = kwargs.get('include_raw_crash') or False params = search_common.get_parameters(kwargs) if not params["signature"]: raise MissingArgumentError('signature') params["terms"] = params["signature"] params["search_mode"] = "is_exactly" # Default mode falls back to starts_with for postgres if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # Limiting to a signature if params["terms"]: params["terms"] = self.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = self.prepare_terms( params["plugin_terms"], params["plugin_search_mode"] ) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = self.parse_versions( params["versions"], params["products"] ) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"][:3] == elem[:3]: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = { } # Preparing the different parts of the sql query sql_select = """ SELECT r.date_processed, r.uptime, r.user_comments, r.uuid::uuid, r.uuid as uuid_text, r.product, r.version, r.build, r.signature, r.url, r.os_name, r.os_version, r.cpu_name, r.cpu_info, r.address, r.reason, r.last_crash, r.install_age, r.hangid, r.process_type, r.release_channel, (r.client_crash_date - (r.install_age * INTERVAL '1 second')) AS install_time """ if include_raw_crash: pass else: sql_select += """ , rd.duplicate_of """ wrapped_select = """ WITH report_slice AS ( %s ), dupes AS ( SELECT report_slice.uuid, rd.duplicate_of FROM reports_duplicates rd JOIN report_slice ON report_slice.uuid_text = rd.uuid WHERE rd.date_processed BETWEEN %%(from_date)s AND %%(to_date)s ) SELECT rs.*, dupes.duplicate_of, rc.raw_crash FROM report_slice rs LEFT OUTER JOIN dupes USING (uuid) LEFT OUTER JOIN raw_crashes rc ON rs.uuid = rc.uuid AND rc.date_processed BETWEEN %%(from_date)s AND %%(to_date)s """ sql_from = self.build_reports_sql_from(params) if not include_raw_crash: sql_from = """%s LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid """ % sql_from sql_where, sql_params = self.build_reports_sql_where( params, sql_params, self.context ) sql_order = """ ORDER BY %(key)s %(direction)s """ % sort_order sql_limit, sql_params = self.build_reports_sql_limit( params, sql_params ) # Assembling the query if include_raw_crash: sql_query = "\n".join(( "/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit) ) else: sql_query = "\n".join(( "/* socorro.external.postgresql.report.Report.list */", sql_select, sql_from, sql_where, sql_order, sql_limit) ) # Query for counting the results sql_count_query = "\n".join(( "/* socorro.external.postgresql.report.Report.list */", "SELECT count(*)", sql_from, sql_where) ) # Querying the DB with self.get_connection() as connection: total = self.count( sql_count_query, sql_params, error_message="Failed to count crashes from reports.", connection=connection ) # No need to call Postgres if we know there will be no results if total: if include_raw_crash: sql_query = wrapped_select % sql_query results = self.query( sql_query, sql_params, error_message="Failed to retrieve crashes from reports", connection=connection ) else: results = [] # Transforming the results into what we want fields = ( "date_processed", "uptime", "user_comments", "uuid", "uuid", # the uuid::text one "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "release_channel", "install_time", "duplicate_of", ) if include_raw_crash: fields += ("raw_crash",) crashes = [] for row in results: crash = dict(zip(fields, row)) for i in crash: try: crash[i] = datetimeutil.date_to_string(crash[i]) except TypeError: pass crashes.append(crash) return { "hits": crashes, "total": total }
def get_list(self, **kwargs): """ List all crashes with a given signature and return them. Optional arguments: see SearchCommon.get_parameters() """ params = search_common.get_parameters(kwargs) # Search does not have a signature parameter, so we handle this one # separately and make sure it's a list of one string only. params["signature"] = kwargs.get("signature") if params["signature"] is None: return None if not isinstance(params["signature"], list): params["signature"] = [params["signature"]] # Get information about the versions versions_service = VersionsInfo(self.context) params["versions_info"] = versions_service.versions_info(params) # Whatever the source was, we always search for an exact signature params["terms"] = params["signature"] params["search_mode"] = "is_exactly" query = self.build_query_from_params(params, self.context) json_query = json.dumps(query) logger.debug("Query the crashes or signatures: %s", json_query) es_result = self.query(params["from_date"], params["to_date"], json_query) es_data = json.loads(es_result[0]) if es_data: total = es_data["hits"]["total"] hits = es_data["hits"]["hits"] else: total = 0 hits = [] # filter results fields fields = [ "date_processed", "uptime", "user_comments", "uuid", "product", "version", "build", "signature", "url", "os_name", "os_version", "cpu_name", "cpu_info", "address", "reason", "last_crash", "install_age", "hangid", "process_type", "client_crash_date" ] filtered_hits = [] for hit in hits: filtered_hit = dict( (f, hit["_source"][f]) for f in hit["_source"] if f in fields) client_crash_date = datetimeutil.string_to_datetime( filtered_hit["client_crash_date"]) install_age = datetime.timedelta(0, filtered_hit["install_age"]) install_time = client_crash_date - install_age filtered_hit["install_time"] = install_time.strftime( "%Y-%m-%d %H:%M:%S.%f") filtered_hits.append(filtered_hit) results = {"total": total, "hits": filtered_hits} return results
def test_build_reports_sql_where(self): """ Test PostgreSQLBase.build_reports_sql_where().""" config = self.get_dummy_context() pgbase = self.get_instance() params = search_common.get_parameters({}) # Get default search params default_params = util.DotDict(params.copy()) sql_params = {} # ..................................................................... # Test 1: default values for parameters sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 2: terms and search_mode = is_exactly sql_params = {} params.terms = "signature" params.search_mode = "is_exactly" sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.signature=%(term)s" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "term": params.terms } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 3: terms and search_mode != is_exactly sql_params = {} params.terms = "signature%" params.search_mode = "starts_with" sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.signature LIKE %(term)s" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "term": params.terms } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 4: products sql_params = {} params.terms = default_params.terms params.search_mode = default_params.search_mode params.products = ["Firefox", "Fennec"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND (r.product=%(product0)s OR " \ "r.product=%(product1)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "product0": "Firefox", "product1": "Fennec" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 5: os sql_params = {} params.products = default_params.products params.os = ["Windows"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND (r.os_name=%(os0)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "os0": "Windows" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 6: branches sql_params = {} params.os = default_params.os params.branches = ["2.2", "2.3", "4.0"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND (branches.branch=%(branch0)s OR " \ "branches.branch=%(branch1)s OR branches.branch=%(branch2)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "branch0": "2.2", "branch1": "2.3", "branch2": "4.0" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 7: build_ids sql_params = {} params.branches = default_params.branches params.build_ids = ["20120101123456"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND (r.build=%(build0)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "build0": "20120101123456" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 8: reasons sql_params = {} params.build_ids = default_params.build_ids params.reasons = ["EXCEPTION", "OVERFLOW"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND (r.reason=%(reason0)s OR " \ "r.reason=%(reason1)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "reason0": "EXCEPTION", "reason1": "OVERFLOW" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 9: report_type sql_params = {} params.reasons = default_params.reasons params.report_type = "crash" sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.hangid IS NULL" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 10: versions sql_params = {} params.report_type = default_params.report_type params.versions = ["Firefox", "12.0a1", "Fennec", "11.0", "Firefox", "13.0(beta)"] params.versions_info = { "Firefox:12.0a1": { "version_string": "12.0a1", "product_name": "Firefox", "major_version": "12.0", "release_channel": "Nightly", "build_id": ["20120101123456"] }, "Fennec:11.0": { "version_string": "11.0", "product_name": "Fennec", "major_version": None, "release_channel": None, "build_id": None }, "Firefox:13.0(beta)": { "version_string": "13.0(beta)", "product_name": "Firefox", "major_version": "13.0", "release_channel": "Beta", "build_id": ["20120101123456", "20120101098765"] } } sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND ((r.product=%(version0)s AND " \ "r.release_channel ILIKE 'nightly' AND " \ "r.version=%(version1)s) OR (r.product=%(version2)s AND " \ "r.version=%(version3)s) OR (r.product=%(version4)s AND " \ "r.release_channel ILIKE 'beta' AND r.build IN " \ "('20120101123456', '20120101098765') AND " \ "r.version=%(version5)s))" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "version0": "Firefox", "version1": "12.0", "version2": "Fennec", "version3": "11.0", "version4": "Firefox", "version5": "13.0" } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 11: report_process = plugin sql_params = {} params.versions = default_params.versions params.versions_infos = None params.report_process = "plugin" sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.process_type = 'plugin' AND " \ "plugins_reports.date_processed BETWEEN " \ "%(from_date)s AND %(to_date)s" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 12: report_process != plugin sql_params = {} params.report_process = "content" sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.process_type = 'content'" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp) # ..................................................................... # Test 13: plugins sql_params = {} params.report_process = "plugin" params.plugin_terms = "plugin_name" params.plugin_search_mode = "is_exactly" params.plugin_in = ["name"] sql_exp = "WHERE r.date_processed BETWEEN %(from_date)s AND " \ "%(to_date)s AND r.process_type = 'plugin' AND " \ "plugins_reports.date_processed BETWEEN " \ "%(from_date)s AND %(to_date)s AND " \ "(plugins.name=%(plugin_term)s)" sql_params_exp = { "from_date": params.from_date, "to_date": params.to_date, "plugin_term": params.plugin_terms } (sql, sql_params) = pgbase.build_reports_sql_where(params, sql_params, config) sql = " ".join(sql.split()) # squeeze all \s, \r, \t... self.assertEqual(sql, sql_exp) self.assertEqual(sql_params, sql_params_exp)
def get(self, **kwargs): """ Search for crashes and return them. See http://socorro.readthedocs.org/en/latest/middleware.html#search Optional arguments: see SearchCommon.get_parameters() """ # Creating the connection to the DB self.connection = self.database.connection() cur = self.connection.cursor() params = search_common.get_parameters(kwargs) # change aliases from the web to the implementation's need if "for" in params and "terms" not in params: params["terms"] = params.get("for") if "from" in params and "from_date" not in params: params["from_date"] = params.get("from") if "to" in params and "to_date" not in params: params["to_date"] = params.get("to") if "in" in params and "fields" not in params: params["fields"] = params.get("in") # Default mode falls back to starts_with for postgres if params["search_mode"] == "default": params["search_mode"] = "starts_with" if params["plugin_search_mode"] == "default": params["plugin_search_mode"] = "starts_with" # For Postgres, we never search for a list of terms if params["terms"]: params["terms"] = " ".join(params["terms"]) params["terms"] = Search.prepare_terms(params["terms"], params["search_mode"]) # Searching for terms in plugins if params["report_process"] == "plugin" and params["plugin_terms"]: params["plugin_terms"] = " ".join(params["plugin_terms"]) params["plugin_terms"] = Search.prepare_terms( params["plugin_terms"], params["plugin_search_mode"]) # Get information about the versions util_service = Util(config=self.context) params["versions_info"] = util_service.versions_info(**params) # Parsing the versions params["versions_string"] = params["versions"] (params["versions"], params["products"]) = Search.parse_versions( params["versions"], params["products"]) if hasattr(self.context, 'webapi'): context = self.context.webapi else: # old middleware context = self.context # Changing the OS ids to OS names for i, elem in enumerate(params["os"]): for platform in context.platforms: if platform["id"] == elem: params["os"][i] = platform["name"] # Creating the parameters for the sql query sql_params = { } # Preparing the different parts of the sql query sql_select = self.generate_sql_select(params) # Adding count for each OS for i in context.platforms: sql_params["os_%s" % i["id"]] = i["name"] sql_from = self.build_reports_sql_from(params) (sql_where, sql_params) = self.build_reports_sql_where(params, sql_params, self.context) sql_group = self.generate_sql_group(params) sql_order = """ ORDER BY total DESC, signature """ (sql_limit, sql_params) = self.build_reports_sql_limit(params, sql_params) # Assembling the query sql_query = " ".join(("/* socorro.search.Search search */", sql_select, sql_from, sql_where, sql_group, sql_order, sql_limit)) # Query for counting the results sql_count_query = " ".join(( "/* socorro.external.postgresql.search.Search search.count */", "SELECT count(DISTINCT r.signature)", sql_from, sql_where)) # Debug logger.debug(cur.mogrify(sql_query, sql_params)) # Querying the DB try: total = db.singleValueSql(cur, sql_count_query, sql_params) except db.SQLDidNotReturnSingleValue: total = 0 util.reportExceptionAndContinue(logger) results = [] # No need to call Postgres if we know there will be no results if total != 0: try: results = db.execute(cur, sql_query, sql_params) except psycopg2.Error: util.reportExceptionAndContinue(logger) json_result = { "total": total, "hits": [] } # Transforming the results into what we want for crash in results: if params["report_process"] == "plugin": row = dict(zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent", "pluginname", "pluginversion", "pluginfilename"), crash)) else: row = dict(zip(("signature", "count", "is_windows", "is_mac", "is_linux", "numhang", "numplugin", "numcontent"), crash)) json_result["hits"].append(row) self.connection.close() return json_result