Ejemplo n.º 1
0
def get_total_results(keyword):
    group_by_agg_key_values = {
        "filters": {
            category: {
                "terms": {
                    "type": types
                }
            }
            for category, types in INDEX_ALIASES_TO_AWARD_TYPES.items()
        }
    }
    aggs = A("filters", **group_by_agg_key_values)
    filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
        {"keyword_search": [es_minimal_sanitize(keyword)]})
    search = TransactionSearch().filter(filter_query)
    search.aggs.bucket("types", aggs)
    response = search.handle_execute()

    if response is not None:
        try:
            return response["aggregations"]["types"]["buckets"]
        except KeyError:
            logger.error("Unexpected Response")
    else:
        logger.error("No Response")
        return None
Ejemplo n.º 2
0
def get_sum_and_count_aggregation_results(keyword):
    filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
        {"keyword_search": [es_minimal_sanitize(keyword)]})
    search = TransactionSearch().filter(filter_query)
    search.aggs.bucket("prime_awards_obligation_amount",
                       {"sum": {
                           "field": "transaction_amount"
                       }})
    search.aggs.bucket("prime_awards_count",
                       {"value_count": {
                           "field": "transaction_id"
                       }})
    response = search.handle_execute()

    if response is not None:
        try:
            results = {}
            results["prime_awards_count"] = response["aggregations"][
                "prime_awards_count"]["value"]
            results["prime_awards_obligation_amount"] = round(
                response["aggregations"]["prime_awards_obligation_amount"]
                ["value"], 2)
            return results
        except KeyError:
            logger.exception("Unexpected Response")
    else:
        return None
Ejemplo n.º 3
0
def get_sum_aggregation_results(keyword, field="transaction_amount"):
    group_by_agg_key_values = {"field": field}
    aggs = A("sum", **group_by_agg_key_values)
    filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
        {"keywords": es_minimal_sanitize(keyword)})
    search = TransactionSearch().filter(filter_query)
    search.aggs.bucket("transaction_sum", aggs)
    response = search.handle_execute()

    if response:
        return response["aggregations"]
    else:
        return None
Ejemplo n.º 4
0
def get_download_ids(keyword, field, size=10000):
    """
    returns a generator that
    yields list of transaction ids in chunksize SIZE

    Note: this only works for fields in ES of integer type.
    """
    n_iter = DOWNLOAD_QUERY_SIZE // size

    results = get_total_results(keyword)
    if results is None:
        logger.error(
            "Error retrieving total results. Max number of attempts reached")
        return
    total = sum(results[category]["doc_count"]
                for category in INDEX_ALIASES_TO_AWARD_TYPES.keys())
    required_iter = (total // size) + 1
    n_iter = min(max(1, required_iter), n_iter)
    for i in range(n_iter):
        filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
            {"keyword_search": [es_minimal_sanitize(keyword)]})
        search = TransactionSearch().filter(filter_query)
        group_by_agg_key_values = {
            "field": field,
            "include": {
                "partition": i,
                "num_partitions": n_iter
            },
            "size": size,
            "shard_size": size,
        }
        aggs = A("terms", **group_by_agg_key_values)
        search.aggs.bucket("results", aggs)
        response = search.handle_execute()
        if response is None:
            raise Exception("Breaking generator, unable to reach cluster")
        results = []
        for result in response["aggregations"]["results"]["buckets"]:
            results.append(result["key"])
        yield results
    def post(self, request):

        models = [{
            "name": "fields",
            "key": "fields",
            "type": "array",
            "array_type": "text",
            "text_type": "search",
            "optional": False,
        }]
        models.extend(copy.deepcopy(AWARD_FILTER))
        models.extend(copy.deepcopy(PAGINATION))
        for m in models:
            if m["name"] in ("keywords", "award_type_codes", "sort"):
                m["optional"] = False
        validated_payload = TinyShield(models).block(request.data)

        record_num = (validated_payload["page"] -
                      1) * validated_payload["limit"]
        if record_num >= settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW:
            raise UnprocessableEntityException(
                "Page #{page} of size {limit} is over the maximum result limit ({es_limit}). Consider using custom data downloads to obtain large data sets."
                .format(
                    page=validated_payload["page"],
                    limit=validated_payload["limit"],
                    es_limit=settings.ES_TRANSACTIONS_MAX_RESULT_WINDOW,
                ))

        if validated_payload["sort"] not in validated_payload["fields"]:
            raise InvalidParameterException(
                "Sort value not found in fields: {}".format(
                    validated_payload["sort"]))

        if "filters" in validated_payload and "no intersection" in validated_payload[
                "filters"]["award_type_codes"]:
            # "Special case": there will never be results when the website provides this value
            return Response({
                "limit": validated_payload["limit"],
                "results": [],
                "page_metadata": {
                    "page": validated_payload["page"],
                    "next": None,
                    "previous": None,
                    "hasNext": False,
                    "hasPrevious": False,
                },
            })
        sorts = {
            TRANSACTIONS_LOOKUP[validated_payload["sort"]]:
            validated_payload["order"]
        }
        lower_limit = (validated_payload["page"] -
                       1) * validated_payload["limit"]
        upper_limit = (
            validated_payload["page"]) * validated_payload["limit"] + 1
        validated_payload["filters"]["keyword_search"] = [
            es_minimal_sanitize(x)
            for x in validated_payload["filters"]["keywords"]
        ]
        validated_payload["filters"].pop("keywords")
        filter_query = QueryWithFilters.generate_transactions_elasticsearch_query(
            validated_payload["filters"])
        search = TransactionSearch().filter(filter_query).sort(
            sorts)[lower_limit:upper_limit]
        response = search.handle_execute()
        return Response(
            self.build_elasticsearch_result(validated_payload, response))