예제 #1
0
    def __get_query_agg_percentiles(cls, field, agg_id=None):
        """
        Create an es_dsl aggregation object for getting the percentiles value of a field.
        In general this is used to get the median (0.5) percentile.

        :param field: field from which the get the percentiles values
        :return: a tuple with the aggregation id and es_dsl aggregation object. Ex:
                {
                    "percentile": {
                        "field": <field>
                }
        """
        if not agg_id:
            agg_id = cls.AGGREGATION_ID
        query_agg = A("percentiles", field=field)
        return (agg_id, query_agg)
예제 #2
0
 def _get_aggregation(self, **extra):
     params = {
         'path': self.nestedfield,
         'aggs': {
             'val': {
                 'terms': {
                     'field': self.field,
                     'size': 40,
                     'min_doc_count': 1
                 }
             }
         }
     }
     params.update(self.kwargs)
     params.update(extra)
     return A('nested', **params)
예제 #3
0
    def get_cardinality(self, field=None):
        """
        Create a cardinality aggregation object and add it to the aggregation dict

        :param field: the field present in the index that is to be aggregated
        :returns: self, which allows the method to be chainable with the other methods
        """

        if not field:
            raise AttributeError(
                "Please provide field to apply aggregation to!")
        agg = A("cardinality",
                field=field,
                precision_threshold=self.precision_threshold)
        self.aggregations['cardinality_' + field] = agg
        return self
예제 #4
0
파일: api.py 프로젝트: lauren-d/rero-ils
 def organisation_pids(self):
     """Get organisations pids."""
     organisations = set()
     search = DocumentsSearch().filter('term',
                                       contribution__agent__pid=self.pid)
     size = current_app.config.get('RERO_ILS_AGGREGATION_SIZE').get(
         'organisations')
     agg = A('terms',
             field='holdings.organisation.organisation_pid',
             size=size)
     search.aggs.bucket('organisation', agg)
     results = search.execute()
     for result in results.aggregations.organisation.buckets:
         if result.doc_count:
             organisations.add(result.key)
     return list(organisations)
예제 #5
0
    def infoFirewallActions(self):
        s = Search(index='ossim-osdepym*')
        s = s.query('match_all')
        s = s.filter('range',
                     log_date={
                         "gte": 1554087600000,
                         "lte": 1556679599999
                     })
        s.aggs.bucket(
            'actions',
            A('terms',
              field='action.keyword',
              size=10,
              order={"_count": "desc"}))

        return s.execute().aggregations.actions.buckets
예제 #6
0
    def __get_query_agg_cardinality(cls, field, agg_id=None):
        """
        Create an es_dsl aggregation object for getting the approximate count of distinct values of a field.

        :param field: field from which the get count of distinct values
        :return: a tuple with the aggregation id and es_dsl aggregation object. Ex:
                {
                    "cardinality": {
                        "field": <field>,
                        "precision_threshold": 3000
                }
        """
        if not agg_id:
            agg_id = cls.AGGREGATION_ID
        query_agg = A("cardinality", field=field, precision_threshold=cls.ES_PRECISION)
        return (agg_id, query_agg)
    def test_get_extended_stats(self):
        """
        Test the extended statistics aggregation
        """

        field = self.field1
        # without field param
        with self.assertRaises(AttributeError):
            self.Query_test_object.get_extended_stats()

        # with field param
        self.Query_test_object.get_extended_stats(field)
        test_agg = A("extended_stats", field=field)
        agg_name, agg = self.Query_test_object.aggregations.popitem()
        self.assertEqual('extended_stats_' + field, agg_name)
        self.assertEqual(agg, test_agg)
예제 #8
0
            def get_loans_by_item_pids(item_pids):
                """Get loans for the given item pid list."""
                states = \
                    current_app.config['CIRCULATION_STATES_LOAN_ACTIVE']
                loan_search = LoansSearch() \
                    .filter('terms', state=states) \
                    .filter('terms', item_pid__value=item_pids) \
                    .source(['pid', 'item_pid.value', 'start_date',
                            'end_date', 'state', '_created'])
                agg = A('terms', field='item_pid.value', size=chunk_size)
                loan_search.aggs.bucket('loans_count', agg)

                loan_search = loan_search.extra(
                    collapse={
                        'field': 'item_pid.value',
                        "inner_hits": {
                            "name": "most_recent",
                            "size": 1,
                            "sort": [{
                                "_created": "desc"
                            }],
                        }
                    })
                # default results size for the execute method is 10.
                # We need to set this to the chunk size"
                results = loan_search[0:chunk_size].execute()
                agg_buckets = {}
                for result in results.aggregations.loans_count.buckets:
                    agg_buckets[result.key] = result.doc_count
                loans = {}
                for loan_hit in results:
                    # get most recent loans
                    loan_data = loan_hit.meta.inner_hits.most_recent[0]\
                        .to_dict()
                    item_pid = loan_data['item_pid']['value']
                    loans[item_pid] = {
                        'loans_count':
                        agg_buckets.get(item_pid, 0),
                        'last_transaction_date':
                        ciso8601.parse_datetime(loan_data['_created']).date()
                    }
                    if loan_data.get('state') == LoanState.ITEM_ON_LOAN:
                        loans[item_pid]['checkout_date'] = ciso8601.\
                            parse_datetime(loan_data['start_date']).date()
                        loans[item_pid]['due_date'] = ciso8601.\
                            parse_datetime(loan_data['end_date']).date()
                return loans
예제 #9
0
    def infoFlowLogs(self):
        s = Search(index='ossim-osdepym*')
        s = s.query('match_all')
        s = s.filter('range',
                     log_date={
                         "gte": 1554087600000,
                         "lte": 1556679599999
                     })
        s.aggs.bucket(
            'users',
            A('date_histogram',
              field='log_date',
              interval="30m",
              time_zone="America/Argentina/Buenos_Aires",
              min_doc_count=1))

        return s.execute().aggregations.users.buckets
예제 #10
0
    def BuildRootTree(self):
        s = Search()
        t = Q('has_parent',
              type='hostname',
              query=Q('query_string', query="*"))
        aggs = A('terms', field='AuditType.Generator', size=16)

        s.aggs.bucket('datatypes', aggs)
        query = s.query(t)

        try:
            r = requests.post(self.es_host + ":" + self.es_port + self.index +
                              self.type_audit_type + '/_search',
                              data=json.dumps(query.to_dict()),
                              auth=(self.elastic_user, self.elastic_pass),
                              verify=False)
        except ConnectionError as e:
            ret = {"connection_error": e.args[0]}
            return ret

        data = [{
            "id": "stackable",
            "parent": "#",
            "text": "Stackable Data",
            "type": "root"
        }]

        i = [
            'w32services', 'w32tasks', 'w32scripting-persistence',
            'w32prefetch', 'w32network-dns', 'urlhistory',
            'filedownloadhistory'
        ]

        for x in r.json()['aggregations']['datatypes']['buckets']:
            if x['key'] not in i:
                pass
            else:
                data.append({
                    "id": x['key'],
                    "parent": "stackable",
                    "text": x['key'],
                    "children": True,
                    "type": "stack"
                })

        return data
예제 #11
0
def test_nb_sales_by_product_type_filter_product_type_1(elasticsearch_sale):
    # Number of sales, by product type, for product_type_1
    a = A('filter', term={'products.product_type': 'product_type_1'})
    a.bucket(
        'reverse_nested_root',
        'reverse_nested',
    )
    search = get_search()
    search.aggs.bucket(
        'products',
        'nested',
        path='products',
    ).bucket(
        'product_type_1',
        a,
    )
    write_output(search, 'nb_sales_by_product_type_filter_product_type_1')
예제 #12
0
    def extend_elasticsearch_search_with_sub_aggregation(self, search: AccountSearch):
        """
        This template method is called if the `self.sub_agg_key` is supplied, in order to post-process the query and
        inject a sub-aggregation on a secondary dimension (that is subordinate to the first agg_key's dimension).

        Example: Subtier Agency spending rolled up to Toptier Agency spending
        """
        sub_bucket_count = 1000  # get_number_of_unique_terms_for_accounts(self.filter_query, f"{self.sub_agg_key}")
        size = sub_bucket_count
        shard_size = sub_bucket_count + 100

        if shard_size > 10000:
            raise ForbiddenException(
                "Current filters return too many unique items. Narrow filters to return results or use downloads."
            )

        # Sub-aggregation to append to primary agg
        sub_group_by_sub_agg_key_values = {"field": self.sub_agg_key, "size": size, "shard_size": shard_size}

        sub_group_by_sub_agg_key = A("terms", **sub_group_by_sub_agg_key_values)
        sub_dim_metadata = A(
            "top_hits",
            size=1,
            sort=[{"financial_accounts_by_award.update_date": {"order": "desc"}}],
            _source={"includes": self.sub_top_hits_fields},
        )
        sub_sum_covid_outlay = A(
            "sum",
            field="financial_accounts_by_award.gross_outlay_amount_by_award_cpe",
            script={"source": "doc['financial_accounts_by_award.is_final_balances_for_fy'].value ? _value : 0"},
        )
        sub_sum_covid_obligation = A("sum", field="financial_accounts_by_award.transaction_obligated_amount")
        sub_count_awards_by_dim = A("reverse_nested", **{})
        sub_award_count = A("value_count", field="financial_account_distinct_award_key")
        loan_value = A("sum", field="total_loan_value")

        sub_group_by_sub_agg_key.metric("dim_metadata", sub_dim_metadata).metric(
            "sum_transaction_obligated_amount", sub_sum_covid_obligation
        ).metric("sum_gross_outlay_amount_by_award_cpe", sub_sum_covid_outlay).bucket(
            "count_awards_by_dim", sub_count_awards_by_dim
        ).metric(
            "award_count", sub_award_count
        ).metric(
            "sum_loan_value", loan_value
        )

        # Append sub-agg to primary agg, and include the sub-agg's sum metric aggs too
        search.aggs[self.agg_group_name]["group_by_dim_agg"].bucket(self.sub_agg_group_name, sub_group_by_sub_agg_key)
def test_scan_aggs_with_multiple_aggs(data_client):
    s = Search(index='flat-git')
    key_aggs = [
        {
            'files': A('terms', field='files')
        },
        {
            'months': {
                'date_histogram': {
                    'field': 'committed_date',
                    'interval': 'month'
                }
            }
        },
    ]
    file_list = list(scan_aggs(s, key_aggs))

    assert len(file_list) == 47
예제 #14
0
def get_loans_count_by_library_for_patron_pid(patron_pid, filter_states=None):
    """Get loans count for patron and aggregate result on library_pid.

    :param patron_pid: The patron pid
    :param filter_states: loans type to filters
    :return: a dict with library_pid as key, number of loans as value
    """
    filter_states = filter_states or []  # prevent mutable argument warning
    agg = A('terms', field='library_pid')
    search = search_by_patron_item_or_document(patron_pid=patron_pid,
                                               filter_states=filter_states)
    search.aggs.bucket('library', agg)
    search = search[0:0]
    results = search.execute()
    stats = {}
    for result in results.aggregations.library.buckets:
        stats[result.key] = result.doc_count
    return stats
예제 #15
0
    def get_by_id(self, discussion_id, min_il=0, max_il=100):
        """Return a single discussion by discussion_id"""

        search = self._prepare_search() \
            .filter("match", discussion_id=discussion_id)
        search.aggs.bucket('discussions', A('terms', field='discussion_id')) \
            .bucket("unread", "filter", term={"is_unread": True})
        result = search.execute()
        if not result.hits or len(result.hits) < 1:
            return None

        message = self.get_last_message(discussion_id, min_il, max_il, True)
        discussion = DiscussionIndex(discussion_id)
        discussion.total_count = result.hits.total
        discussion.last_message = message
        discussion.unread_count = result.aggregations.discussions.buckets[
            0].unread.doc_count
        return discussion
    def get_percentiles(self, field=None, percents=None):
        """
        Create a percentile aggregation object and add it to the aggregation dict

        :param field: the field present in the index that is to be aggregated
        :param percents: the specific percentiles to be calculated
                         default: [1.0, 5.0, 25.0, 50.0, 75.0, 95.0, 99.0]
        :returns: self, which allows the method to be chainable with the other methods
        """

        if not field:
            raise AttributeError("Please provide field to apply aggregation to!")
        if not percents:
            percents = [1.0, 5.0, 25.0, 50.0, 75.0, 95.0, 99.0]
        agg = A("percentiles", field=field, percents=percents)

        self.aggregations['percentiles_' + field] = agg
        return self
def test_scan_aggs_with_multiple_aggs(data_client):
    s = Search(index="flat-git")
    key_aggs = [
        {
            "files": A("terms", field="files")
        },
        {
            "months": {
                "date_histogram": {
                    "field": "committed_date",
                    "interval": "month"
                }
            }
        },
    ]
    file_list = list(scan_aggs(s, key_aggs))

    assert len(file_list) == 47
예제 #18
0
    def by_period(self,
                  field=None,
                  period=None,
                  timezone=None,
                  start=None,
                  end=None):
        """
        Create a date histogram aggregation using the last added aggregation for the
        current object. Add this date_histogram aggregation into self.aggregations

        :param field: the index field to create the histogram from
        :param period: the interval which elasticsearch supports, ex: "month", "week" and such
        :param timezone: custom timezone
        :param start: custom start date for the date histogram, default: start date under range
        :param end: custom end date for the date histogram, default: end date under range
        :returns: self, which allows the method to be chainable with the other methods
        """

        hist_period = period if period else self.interval_
        time_zone = timezone if timezone else "UTC"

        start_ = start if start else self.start_date
        end_ = end if end else self.end_date
        bounds = self.get_bounds(start_, end_)

        date_field = field if field else "grimoire_creation_date"
        agg_key = "date_histogram_" + date_field
        if agg_key in self.aggregations.keys():
            agg = self.aggregations[agg_key]
        else:
            agg = A("date_histogram",
                    field=date_field,
                    interval=hist_period,
                    time_zone=time_zone,
                    min_doc_count=0,
                    **bounds)

        child_agg_counter = self.child_agg_counter_dict[agg_key]
        child_name, child_agg = self.aggregations.popitem()

        agg.metric(child_agg_counter, child_agg)
        self.aggregations[agg_key] = agg
        self.child_agg_counter_dict[agg_key] += 1
        return self
예제 #19
0
    def _create_aggregate(self, catalog: CatalogName, filters: MutableFilters, facet_config, agg):
        """
        Creates the aggregation to be used in ElasticSearch

        :param catalog: The name of the catalog to create the aggregations for

        :param filters: Translated filters from 'files/' endpoint call

        :param facet_config: Configuration for the facets (i.e. facets on which
               to construct the aggregate) in '{browser:es_key}' form

        :param agg: Current aggregate where this aggregation is occurring.
                    Syntax in browser form

        :return: returns an Aggregate object to be used in a Search query
        """
        # Pop filter of current Aggregate
        excluded_filter = filters.pop(facet_config[agg], None)
        # Create the appropriate filters
        filter_query = self._create_query(catalog, filters)
        # Create the filter aggregate
        aggregate = A('filter', filter_query)
        # Make an inner aggregate that will contain the terms in question
        _field = f'{facet_config[agg]}.keyword'
        service_config = self.service_config(catalog)
        if agg == 'project':
            _sub_field = service_config.translation['projectId'] + '.keyword'
            aggregate.bucket('myTerms', 'terms', field=_field, size=config.terms_aggregation_size).bucket(
                'myProjectIds', 'terms', field=_sub_field, size=config.terms_aggregation_size)
        else:
            aggregate.bucket('myTerms', 'terms', field=_field, size=config.terms_aggregation_size)
        aggregate.bucket('untagged', 'missing', field=_field)
        if agg == 'fileFormat':
            # FIXME: Use of shadow field is brittle
            #        https://github.com/DataBiosphere/azul/issues/2289
            file_size_field = service_config.translation['fileSize'] + '_'
            aggregate.aggs['myTerms'].metric('size_by_type', 'sum', field=file_size_field)
            aggregate.aggs['untagged'].metric('size_by_type', 'sum', field=file_size_field)
        # If the aggregate in question didn't have any filter on the API
        #  call, skip it. Otherwise insert the popped
        # value back in
        if excluded_filter is not None:
            filters[facet_config[agg]] = excluded_filter
        return aggregate
예제 #20
0
def clusters():
    """
    Отображает AJAX-версию страницы с кластеризацией. Предназначено для замены
    display_clusters() после тестирования.
    """
    person = request.values.get('filter')
    print(person)
    Face._index.refresh()

    total = Face.search().count()
    named = Face.search().filter("exists", field="person").count()
    status = "{:.1%} ({} out of {}) faces are named. Clusters count: {}".format(
        named / total, named, total,
        Cluster.search().count())

    a = A("terms", field="person.raw", size=10000)
    ps = Search()
    ps.aggs.bucket("persons", a)
    psr = ps.execute()

    persons = [b.key for b in psr.aggs['persons']]

    if person:
        s = Cluster.search().filter("prefix",
                                    person=person).sort("-face_count")
        results = s[0:10000].execute()
    else:
        s = Cluster.search().exclude("exists", field="person")
        s.query = FunctionScore(query=s.query,
                                functions=[
                                    SF('random_score', weight=100),
                                    SF('field_value_factor',
                                       field="face_count",
                                       weight=1)
                                ],
                                score_mode="avg",
                                boost_mode="replace")
        results = s[0:50].execute()

    return render_template('clusters.html',
                           clusters=results,
                           persons=persons,
                           status=status)
예제 #21
0
    def _get_search_instance(self, **kwargs):
        '''
		Instantiate and return an Elasticsearch search instance. Set the 
		maximum number of aggregation buckets to 10.

		In practical terms, bucket size is the number of siblings at a given 
		taxonomy level. As of 7/17/17, we have seven siblings on level_0 and 
		an average of nine on level_1. Electronics currently has 14 children,
		i.e. buckets, so we'll set our bucket size to 15. 
		'''
        # set the max number of buckets to retrieve
        agg_level = kwargs.get('agg_level')
        BUCKET_SIZE = 15
        s = Search(index='article')
        agg_on_field = agg_level + '.raw'
        cat_lv_dict = {'field': agg_on_field}
        a = A('terms', size=BUCKET_SIZE, **cat_lv_dict)
        s.aggs.bucket('category', a)
        return s, BUCKET_SIZE
예제 #22
0
 def test_get_aggregation(self):
     expected = A({
         'nested': {
             'path': 'some_field'
         },
         'aggs': {
             'min_start': {
                 'min': {
                     'field': 'some_field.start'
                 }
             },
             'max_end': {
                 'max': {
                     'field': 'some_field.end'
                 }
             }
         }
     })
     assert self.facet.get_aggregation() == expected
예제 #23
0
 def get_queryset(self):
     """Adds aggregations and sets an empty size to return only facets."""
     creator = A("nested", path="creators")
     creator_name = A("terms", field="creators.title.keyword", size=100)
     subject = A("nested", path="terms")
     subject_name = A("terms", field="terms.title.keyword", size=100)
     format = A("terms", field="formats.keyword")
     max_date = A("max", field="dates.end", format="epoch_millis")
     min_date = A("min", field="dates.begin", format="epoch_millis")
     online = A('filter', Q('terms', online=[True]))
     self.search.aggs.bucket('creator',
                             creator).bucket("name", creator_name)
     self.search.aggs.bucket('subject',
                             subject).bucket("name", subject_name)
     self.search.aggs.bucket('format', format)
     self.search.aggs.bucket("max_date", max_date)
     self.search.aggs.bucket("min_date", min_date)
     self.search.aggs.bucket("online", online)
     return (self.search.extra(size=0).query(self.get_structured_query()) if
             self.request.GET.get(settings.REST_FRAMEWORK["SEARCH_PARAM"])
             else self.search.extra(size=0))
    def _get_download_ids_generator(cls, search: Union[AwardSearch,
                                                       TransactionSearch],
                                    size: int):
        """
        Takes an AwardSearch or TransactionSearch object (that specifies the index, filter, and source) and returns
        a generator that yields list of IDs in chunksize SIZE.
        """
        max_retries = 10
        total = search.handle_count(retries=max_retries)
        if total is None:
            logger.error(
                "Error retrieving total results. Max number of attempts reached."
            )
            return
        max_iterations = settings.MAX_DOWNLOAD_LIMIT // size
        req_iterations = (total // size) + 1
        num_iterations = min(max(1, req_iterations), max_iterations)

        # Setting the shard_size below works in this case because we are aggregating on a unique field. Otherwise, this
        # would not work due to the number of records. Other places this is set are in the different spending_by
        # endpoints which are either routed or contain less than 10k unique values, both allowing for the shard
        # size to be manually set to 10k.
        for iteration in range(num_iterations):
            aggregation = A(
                "terms",
                field=cls._source_field,
                include={
                    "partition": iteration,
                    "num_partitions": num_iterations
                },
                size=size,
                shard_size=size,
            )
            search.aggs.bucket("results", aggregation)
            response = search.handle_execute(retries=max_retries).to_dict()

            if response is None:
                raise Exception("Breaking generator, unable to reach cluster")
            results = []
            for bucket in response["aggregations"]["results"]["buckets"]:
                results.append(bucket["key"])

            yield results
예제 #25
0
def get_total_results(keyword):
    group_by_agg_key_values = {
        "filters": {category: {"terms": {"type": types}} for category, types in INDEX_ALIASES_TO_AWARD_TYPES.items()}
    }
    aggs = A("filters", **group_by_agg_key_values)
    filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
        {"keyword_search": [es_minimal_sanitize(keyword)]}
    )
    search = TransactionSearch().filter(filter_query)
    search.aggs.bucket("types", aggs)
    response = search.handle_execute()

    if response is not None:
        try:
            return response["aggregations"]["types"]["buckets"]
        except KeyError:
            logger.error("Unexpected Response")
    else:
        logger.error("No Response")
        return None
예제 #26
0
    def get_daily_volume(self, from_date, to_date):
        s = Search(using='operations', index="deex-*")
        s = s.extra(size=0)
        s = s.query('bool', filter = [
            Q('term', operation_type=4),
            Q('range', block_data__block_time={'gte': from_date, 'lte': to_date}),
            Q('term', operation_history__op_object__fill_price__quote__asset_id__keyword=config.CORE_ASSET_ID)
        ])

        a = A('date_histogram', field='block_data.block_time', interval='1d', format='yyyy-MM-dd') \
                .metric('volume', 'sum', field='operation_history.op_object.fill_price.quote.amount')
        s.aggs.bucket('volume_over_time', a)

        response = s.execute()

        daily_volumes = []
        for daily_volume in response.aggregations.volume_over_time.buckets:
            daily_volumes.append({ 'date': daily_volume.key_as_string, 'volume': daily_volume.volume.value })
        
        return daily_volumes
예제 #27
0
    def get(self):
        """
        Answer to GET requests.

        Used for `visualize` tab instance search typeahead.

        Write a list of all instance names in Elasticsearch.
        """
        s = Result.search()
        a = A("terms", field="instance_name",
              size=0)  # set size to 0 so all results are returned
        s.aggs.bucket("unique_instances", a)
        s = s.params(search_type="count")
        res = s.execute()

        names = [
            x["key"] for x in res.aggregations["unique_instances"]["buckets"]
        ]

        return self.write(json.dumps(names))
예제 #28
0
    def applyAggregations(self):
        rootAgg = self.query.aggs.bucket('editions',
                                         A('nested', path='editions'))

        lastAgg = rootAgg
        for i, agg in enumerate(self.appliedAggregations):
            currentAgg = 'edition_filter_{}'.format(i)
            lastAgg = lastAgg.bucket(currentAgg, agg)

        lastAgg.bucket('lang_parent', 'nested', path='editions.languages')\
            .bucket(
                'languages', 'terms',
                **{'field': 'editions.languages.language', 'size': 200}
            )\
            .bucket('editions_per', 'reverse_nested')

        lastAgg.bucket(
            'formats', 'terms',
            **{'field': 'editions.formats', 'size': 10}
        )\
            .bucket('editions_per', 'reverse_nested')
    def test_by_period_without_args(self):
        """
        Test the date histogram aggregation with no parameters
        """

        test_agg = A("date_histogram",
                     field=self.date_field1,
                     interval=self.interval,
                     time_zone=self.timezone,
                     min_doc_count=0,
                     **{})
        test_agg.metric(0,
                        "cardinality",
                        field=self.field1,
                        precision_threshold=self.precision_threshold)

        self.Query_test_object.get_cardinality(self.field1)\
                              .by_period()
        agg_name, agg = self.Query_test_object.aggregations.popitem()

        self.assertEqual(agg, test_agg, msg='\n{0}\n{1}'.format(agg, test_agg))
    def test_by_organizations(self):
        """
        Test nested aggregation wrt author organizations
        Just checking if the aggregation exists in the dict, for now
        Because there is no org field in 'git' data source
        """

        test_agg = A("terms",
                     field="author_domain",
                     missing="others",
                     size=self.size)
        test_agg.metric(0,
                        "cardinality",
                        field=self.field1,
                        precision_threshold=self.precision_threshold)

        self.Query_test_object.get_cardinality(self.field1)\
                              .by_organizations("author_domain")
        agg_name, agg = self.Query_test_object.aggregations.popitem()

        self.assertEqual(agg, test_agg, msg='\n{0}\n{1}'.format(agg, test_agg))