def get_bucket(self, threshold, bucket_id, project=None, from_=None, size=None): """ Returns information for the given bucket. """ # Coerce to a Threshold object. threshold = Threshold(threshold) query = { "query": { "constant_score": { "filter": { "term": { "buckets." + threshold.to_elasticsearch(): bucket_id } }}}, "sort": { "date": { "order": "desc" }}, #"aggregations": { #"significant": { #"significant_terms": { #"field": "_all", #"mutual_information": {}, #"size": 100 #} #} #} } if from_ is not None: query["from"] = from_; query["size"] = size; response = self.es.search(body=query, index=self.es_index) with open('bucket_response', 'wb') as debug_file: print(json.dumps(response, indent=2), file=debug_file) reports_found = response['hits']['total'] # Since no reports were found, assume the bucket does not exist (at # least for this project). if reports_found < 1: raise BucketNotFoundError(bucket_id) reports = get_reports_by_bucket(response, threshold).get(bucket_id) assert reports return Bucket(id=bucket_id, project=project, threshold=threshold, total=reports_found, top_reports=reports, first_seen=None)
def get_bucket(self, threshold, bucket_id, project=None, from_=None, size=None): """ Returns information for the given bucket. """ # Coerce to a Threshold object. threshold = Threshold(threshold) query = { "filter": { "term": { "buckets." + threshold.to_elasticsearch(): bucket_id } }, "sort": { "date": { "order": "desc" }} } if from_ is not None: query["from"] = from_; query["size"] = size; response = self.es.search(body=query, index='crashes') reports_found = response['hits']['total'] # Since no reports were found, assume the bucket does not exist (at # least for this project). if reports_found < 1: raise BucketNotFoundError(bucket_id) reports = get_reports_by_bucket(response, threshold).get(bucket_id) assert reports return Bucket(id=bucket_id, project=project, threshold=threshold, total=reports_found, top_reports=reports, first_seen=None)
def top_buckets(self, lower_bound, threshold=None, project=None, from_=None, size=None): """ Given a datetime lower_bound (from date), calculates the top buckets in the given timeframe for the given threshold (automatically determined if not given). The results can be tailed for a specific project if needed. Returns a list of {'doc_count': int, 'key': id} dictionaries. """ if not isinstance(lower_bound, datetime): raise TypeError('The lower bound MUST be a datetime object.') # Get the default threshold. if threshold is None: threshold = self.default_threshold if not isinstance(threshold, Threshold): threshold = Threshold(threshold) # Filters by lower-bound by default; filters = [{ "range": { "date": { "gt": lower_bound.isoformat() } } }] # May filter optionally by project name. if project is not None: filters.append({ "term": { "project": project } }) # Oh, ElasticSearch! You and your verbose query "syntax"! query = { # Read this inside out: "aggs": { "top_buckets_filtered": { # Filter the top buckets by date, and maybe by project. "filter": { "bool": { "must": filters } }, # Get the top buckets in descending order of size. "aggs": { "top_buckets": { "terms": { "field": "buckets." + threshold.to_elasticsearch(), "order": { "_count": "desc" }, }, # Get the date of the latest crash per bucket. "aggs": { "first_seen": { "min": { "field": "date" } } } } } } }, # Do not send any hits back! "size": 0 } if size is None: size = 10 actual_size = size if from_ is not None: assert from_ >= 0 actual_size = actual_size + from_ if size is not None: assert size >= 0 (query["aggs"]["top_buckets_filtered"]["aggs"] ["top_buckets"]["terms"]["size"]) = actual_size try: response = self.es.search(body=query, index='crashes') except RequestError as e: print(e.error, file=sys.stderr) raise e # Oh, ElasticSearch! You and your verbose responses! top_buckets = (response['aggregations'] ['top_buckets_filtered'] ['top_buckets'] ['buckets']) if from_ is not None: top_buckets = top_buckets[from_:] return [Bucket(id=bucket['key'], project=project, threshold=threshold, total=bucket['doc_count'], first_seen=bucket['first_seen']['value_as_string'], top_reports=None) for bucket in top_buckets]
def top_buckets(self, lower_bound, threshold=None, project=None, from_=None, size=None, upper_bound=None, query_string=None): """ Given a datetime lower_bound (from date), calculates the top buckets in the given timeframe for the given threshold (automatically determined if not given). The results can be tailed for a specific project if needed. Returns a list of {'doc_count': int, 'key': id} dictionaries. """ if not isinstance(lower_bound, datetime): raise TypeError('The lower bound MUST be a datetime object.') # Get the default threshold. if threshold is None: threshold = self.default_threshold if not isinstance(threshold, Threshold): threshold = Threshold(threshold) # Filters by lower-bound by default; filters = [{"range": {"date": {"gt": lower_bound.isoformat()}}}] if upper_bound is not None: filters[0]["range"]["date"]["lt"] = upper_bound.isoformat() # May filter optionally by project name. if project is not None: filters.append({"term": {"project": project}}) # this doesn't work on ES 2.3! if query_string is not None: print("Query string!", file=sys.stderr) filters.append({ "query": { "query_string": { "query": query_string, "default_operator": "AND", } } }) # Oh, ElasticSearch! You and your verbose query "syntax"! query = { # Read this inside out: "aggs": { "top_buckets_filtered": { # Filter the top buckets by date, and maybe by project. "filter": { "bool": { "must": filters } }, # Get the top buckets in descending order of size. "aggs": { "top_buckets": { "terms": { "field": "buckets." + threshold.to_elasticsearch(), "order": { "_count": "desc" }, }, # Get the date of the latest crash per bucket. "aggs": { "first_seen": { "min": { "field": "date" } } } } } } }, # Do not send any hits back! "size": 0 } if size is None: size = 10 actual_size = size if from_ is not None: assert from_ >= 0 actual_size = actual_size + from_ if size is not None: assert size >= 0 (query["aggs"]["top_buckets_filtered"]["aggs"]["top_buckets"] ["terms"]["size"]) = actual_size try: response = self.es.search(body=query, index=self.es_index) except RequestError as e: print(e.error, file=sys.stderr) raise e # Oh, ElasticSearch! You and your verbose responses! top_buckets = (response['aggregations']['top_buckets_filtered'] ['top_buckets']['buckets']) if from_ is not None: top_buckets = top_buckets[from_:] return [ Bucket(id=bucket['key'], project=project, threshold=threshold, total=bucket['doc_count'], first_seen=bucket['first_seen']['value_as_string'], top_reports=None) for bucket in top_buckets ]
def get_bucket(self, threshold, bucket_id, project=None, from_=None, size=None): """ Returns information for the given bucket. """ # Coerce to a Threshold object. threshold = Threshold(threshold) query = { "query": { "constant_score": { "filter": { "term": { "buckets." + threshold.to_elasticsearch(): bucket_id } } } }, "sort": { "date": { "order": "desc" } }, #"aggregations": { #"significant": { #"significant_terms": { #"field": "_all", #"mutual_information": {}, #"size": 100 #} #} #} } if from_ is not None: query["from"] = from_ query["size"] = size response = self.es.search(body=query, index=self.es_index) with open('bucket_response', 'wb') as debug_file: print(json.dumps(response, indent=2), file=debug_file) reports_found = response['hits']['total'] # Since no reports were found, assume the bucket does not exist (at # least for this project). if reports_found < 1: raise BucketNotFoundError(bucket_id) reports = get_reports_by_bucket(response, threshold).get(bucket_id) assert reports return Bucket(id=bucket_id, project=project, threshold=threshold, total=reports_found, top_reports=reports, first_seen=None)