예제 #1
0
    def get_bucket(self, threshold, bucket_id, 
                   project=None, from_=None, size=None):
        """
        Returns information for the given bucket.
        """
        # Coerce to a Threshold object.
        threshold = Threshold(threshold)

        query = {
            "query": { "constant_score": {
            "filter": {
                "term": {
                    "buckets." + threshold.to_elasticsearch(): bucket_id
                }
            }}},
            "sort": { "date": { "order": "desc" }},
            #"aggregations": {
                #"significant": {
                    #"significant_terms": {
                        #"field": "_all",
                        #"mutual_information": {},
                        #"size": 100
                     #}
                #}
            #}
        }
                
        if from_ is not None:
            query["from"] = from_;
            query["size"] = size;

        response = self.es.search(body=query, index=self.es_index)
        with open('bucket_response', 'wb') as debug_file:
            print(json.dumps(response, indent=2), file=debug_file)
        
        reports_found = response['hits']['total']

        # Since no reports were found, assume the bucket does not exist (at
        # least for this project).
        if reports_found < 1:
            raise BucketNotFoundError(bucket_id)

        reports = get_reports_by_bucket(response, threshold).get(bucket_id)
        assert reports

        return Bucket(id=bucket_id,
                      project=project,
                      threshold=threshold,
                      total=reports_found,
                      top_reports=reports,
                      first_seen=None)
예제 #2
0
    def get_bucket(self, threshold, bucket_id, 
                   project=None, from_=None, size=None):
        """
        Returns information for the given bucket.
        """
        # Coerce to a Threshold object.
        threshold = Threshold(threshold)

        query = {
            "filter": {
                "term": {
                    "buckets." + threshold.to_elasticsearch(): bucket_id
                }
            },
            "sort": { "date": { "order": "desc" }}
        }
                
        if from_ is not None:
            query["from"] = from_;
            query["size"] = size;

        response = self.es.search(body=query, index='crashes')
        reports_found = response['hits']['total']

        # Since no reports were found, assume the bucket does not exist (at
        # least for this project).
        if reports_found < 1:
            raise BucketNotFoundError(bucket_id)

        reports = get_reports_by_bucket(response, threshold).get(bucket_id)
        assert reports

        return Bucket(id=bucket_id,
                      project=project,
                      threshold=threshold,
                      total=reports_found,
                      top_reports=reports,
                      first_seen=None)
예제 #3
0
    def top_buckets(self, lower_bound, threshold=None, project=None, 
                    from_=None, size=None):
        """
        Given a datetime lower_bound (from date), calculates the top buckets
        in the given timeframe for the given threshold (automatically
        determined if not given). The results can be tailed for a specific
        project if needed.

        Returns a list of {'doc_count': int, 'key': id} dictionaries.
        """

        if not isinstance(lower_bound, datetime):
            raise TypeError('The lower bound MUST be a datetime object.')

        # Get the default threshold.
        if threshold is None:
            threshold = self.default_threshold
        if not isinstance(threshold, Threshold):
            threshold = Threshold(threshold)

        # Filters by lower-bound by default;
        filters = [{
            "range": {
                "date": {
                    "gt": lower_bound.isoformat()
                }
            }
        }]

        # May filter optionally by project name.
        if project is not None:
            filters.append({
                "term": {
                    "project": project
                }
            })

        # Oh, ElasticSearch! You and your verbose query "syntax"!
        query = {
            # Read this inside out:
            "aggs": {
                "top_buckets_filtered": {
                    # Filter the top buckets by date, and maybe by project.
                    "filter": {
                        "bool": { "must": filters }
                    },
                    # Get the top buckets in descending order of size.
                    "aggs": {
                        "top_buckets": {
                            "terms": {
                                "field": "buckets." + threshold.to_elasticsearch(),
                                "order": { "_count": "desc" },
                            },
                            # Get the date of the latest crash per bucket.
                            "aggs": {
                                "first_seen": {
                                    "min": {
                                        "field": "date"
                                    }
                                }
                            }
                        }
                    }
                }
            },

            # Do not send any hits back!
            "size": 0
        }
                                    
        if size is None:
          size = 10
        
        actual_size = size
        
        if from_ is not None:
            assert from_ >= 0
            actual_size = actual_size + from_
        if size is not None:
            assert size >= 0
            (query["aggs"]["top_buckets_filtered"]["aggs"]
                  ["top_buckets"]["terms"]["size"]) = actual_size
        
        try:
            response = self.es.search(body=query, index='crashes')
        except RequestError as e:
            print(e.error, file=sys.stderr)
            raise e

        # Oh, ElasticSearch! You and your verbose responses!
        top_buckets = (response['aggregations']
                       ['top_buckets_filtered']
                       ['top_buckets']
                       ['buckets'])
        
        if from_ is not None:
            top_buckets = top_buckets[from_:]

        return [Bucket(id=bucket['key'], project=project, threshold=threshold,
                       total=bucket['doc_count'],
                       first_seen=bucket['first_seen']['value_as_string'],
                       top_reports=None)
                for bucket in top_buckets]
예제 #4
0
    def top_buckets(self,
                    lower_bound,
                    threshold=None,
                    project=None,
                    from_=None,
                    size=None,
                    upper_bound=None,
                    query_string=None):
        """
        Given a datetime lower_bound (from date), calculates the top buckets
        in the given timeframe for the given threshold (automatically
        determined if not given). The results can be tailed for a specific
        project if needed.

        Returns a list of {'doc_count': int, 'key': id} dictionaries.
        """

        if not isinstance(lower_bound, datetime):
            raise TypeError('The lower bound MUST be a datetime object.')

        # Get the default threshold.
        if threshold is None:
            threshold = self.default_threshold
        if not isinstance(threshold, Threshold):
            threshold = Threshold(threshold)

        # Filters by lower-bound by default;
        filters = [{"range": {"date": {"gt": lower_bound.isoformat()}}}]

        if upper_bound is not None:
            filters[0]["range"]["date"]["lt"] = upper_bound.isoformat()

        # May filter optionally by project name.
        if project is not None:
            filters.append({"term": {"project": project}})

        # this doesn't work on ES 2.3!
        if query_string is not None:
            print("Query string!", file=sys.stderr)
            filters.append({
                "query": {
                    "query_string": {
                        "query": query_string,
                        "default_operator": "AND",
                    }
                }
            })

        # Oh, ElasticSearch! You and your verbose query "syntax"!
        query = {
            # Read this inside out:
            "aggs": {
                "top_buckets_filtered": {
                    # Filter the top buckets by date, and maybe by project.
                    "filter": {
                        "bool": {
                            "must": filters
                        }
                    },
                    # Get the top buckets in descending order of size.
                    "aggs": {
                        "top_buckets": {
                            "terms": {
                                "field":
                                "buckets." + threshold.to_elasticsearch(),
                                "order": {
                                    "_count": "desc"
                                },
                            },
                            # Get the date of the latest crash per bucket.
                            "aggs": {
                                "first_seen": {
                                    "min": {
                                        "field": "date"
                                    }
                                }
                            }
                        }
                    }
                }
            },

            # Do not send any hits back!
            "size": 0
        }

        if size is None:
            size = 10

        actual_size = size

        if from_ is not None:
            assert from_ >= 0
            actual_size = actual_size + from_
        if size is not None:
            assert size >= 0
            (query["aggs"]["top_buckets_filtered"]["aggs"]["top_buckets"]
             ["terms"]["size"]) = actual_size

        try:
            response = self.es.search(body=query, index=self.es_index)
        except RequestError as e:
            print(e.error, file=sys.stderr)
            raise e

        # Oh, ElasticSearch! You and your verbose responses!
        top_buckets = (response['aggregations']['top_buckets_filtered']
                       ['top_buckets']['buckets'])

        if from_ is not None:
            top_buckets = top_buckets[from_:]

        return [
            Bucket(id=bucket['key'],
                   project=project,
                   threshold=threshold,
                   total=bucket['doc_count'],
                   first_seen=bucket['first_seen']['value_as_string'],
                   top_reports=None) for bucket in top_buckets
        ]
예제 #5
0
    def get_bucket(self,
                   threshold,
                   bucket_id,
                   project=None,
                   from_=None,
                   size=None):
        """
        Returns information for the given bucket.
        """
        # Coerce to a Threshold object.
        threshold = Threshold(threshold)

        query = {
            "query": {
                "constant_score": {
                    "filter": {
                        "term": {
                            "buckets." + threshold.to_elasticsearch():
                            bucket_id
                        }
                    }
                }
            },
            "sort": {
                "date": {
                    "order": "desc"
                }
            },
            #"aggregations": {
            #"significant": {
            #"significant_terms": {
            #"field": "_all",
            #"mutual_information": {},
            #"size": 100
            #}
            #}
            #}
        }

        if from_ is not None:
            query["from"] = from_
            query["size"] = size

        response = self.es.search(body=query, index=self.es_index)
        with open('bucket_response', 'wb') as debug_file:
            print(json.dumps(response, indent=2), file=debug_file)

        reports_found = response['hits']['total']

        # Since no reports were found, assume the bucket does not exist (at
        # least for this project).
        if reports_found < 1:
            raise BucketNotFoundError(bucket_id)

        reports = get_reports_by_bucket(response, threshold).get(bucket_id)
        assert reports

        return Bucket(id=bucket_id,
                      project=project,
                      threshold=threshold,
                      total=reports_found,
                      top_reports=reports,
                      first_seen=None)