def test_filtering_subtier_with_bogus_toptier_es(
    client, monkeypatch, elasticsearch_transaction_index, basic_award, subagency_award
):
    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index, logging_statements)

    resp = client.post(
        "/api/v2/search/spending_by_category/awarding_subagency",
        content_type="application/json",
        data={
            "filters": {
                "time_period": [{"start_date": "2018-10-01", "end_date": "2020-09-30"}],
                "agencies": [
                    {
                        "type": "awarding",
                        "tier": "subtier",
                        "name": "Awarding Subtier Agency 5",
                        "toptier_name": "bogus toptier name",
                    }
                ],
            }
        },
        **{EXPERIMENTAL_API_HEADER: ELASTICSEARCH_HEADER_VALUE},
    )
    assert resp.status_code == status.HTTP_200_OK
    assert resp.data == {
        "category": "awarding_subagency",
        "limit": 10,
        "page_metadata": {"page": 1, "next": None, "previous": None, "hasNext": False, "hasPrevious": False},
        "results": [],
        "messages": [get_time_period_message()],
    }
Ejemplo n.º 2
0
def test_subset_of_fields_returned(client, monkeypatch, transaction_data,
                                   elasticsearch_transaction_index):
    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index,
                             logging_statements)

    fields = ["Award ID", "Recipient Name", "Mod"]

    request = {
        "filters": {
            "keyword": "test",
            "award_type_codes": ["A", "B", "C", "D"]
        },
        "fields": fields,
        "page": 1,
        "limit": 5,
        "sort": "Award ID",
        "order": "desc",
    }

    resp = client.post(ENDPOINT,
                       content_type="application/json",
                       data=json.dumps(request))

    assert resp.status_code == status.HTTP_200_OK
    assert len(resp.data["results"]) > 0
    for result in resp.data["results"]:
        for field in fields:
            assert field in result, f"Response item is missing field {field}"

        assert "internal_id" in result
        assert "generated_internal_id" in result

        assert "Last Date to Order" not in result
Ejemplo n.º 3
0
def test_columns_can_be_sorted(client, monkeypatch, transaction_data,
                               elasticsearch_transaction_index):
    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index,
                             logging_statements)

    fields = [
        "Action Date",
        "Award ID",
        "Awarding Agency",
        "Awarding Sub Agency",
        "Award Type",
        "Mod",
        "Recipient Name",
        "Action Date",
    ]

    request = {
        "filters": {
            "keyword": "test",
            "award_type_codes": ["A", "B", "C", "D"]
        },
        "fields": fields,
        "page": 1,
        "limit": 5,
        "order": "desc",
    }

    for field in fields:
        request["sort"] = field
        resp = client.post(ENDPOINT,
                           content_type="application/json",
                           data=json.dumps(request))
        assert resp.status_code == status.HTTP_200_OK, f"Failed to sort column: {field}"
Ejemplo n.º 4
0
def test_correct_response(client, monkeypatch, elasticsearch_transaction_index, awards_and_transactions):

    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index, logging_statements)

    resp = client.post(
        "/api/v2/search/spending_by_category/district",
        content_type="application/json",
        data=json.dumps({"filters": {"time_period": [{"start_date": "2018-10-01", "end_date": "2020-09-30"}]}}),
        **{EXPERIMENTAL_API_HEADER: ELASTICSEARCH_HEADER_VALUE},
    )
    expected_response = {
        "category": "district",
        "limit": 10,
        "page_metadata": {"page": 1, "next": None, "previous": None, "hasNext": False, "hasPrevious": False},
        "results": [
            {"amount": 500000.0, "code": "90", "id": None, "name": "SC-MULTIPLE DISTRICTS"},
            {"amount": 50005.0, "code": "10", "id": None, "name": "SC-10"},
            {"amount": 5500.0, "code": "50", "id": None, "name": "WA-50"},
            {"amount": 50.0, "code": "50", "id": None, "name": "SC-50"},
        ],
        "messages": [get_time_period_message()],
    }
    assert resp.status_code == status.HTTP_200_OK, "Failed to return 200 Response"
    assert len(logging_statements) == 1, "Expected one logging statement"
    assert resp.json() == expected_response
Ejemplo n.º 5
0
def test_correct_response_with_more_awards(
    client, monkeypatch, elasticsearch_transaction_index, basic_award, subagency_award
):

    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index, logging_statements)

    resp = client.post(
        "/api/v2/search/spending_by_category/awarding_agency",
        content_type="application/json",
        data=json.dumps({"filters": {"time_period": [{"start_date": "2018-10-01", "end_date": "2020-09-30"}]}}),
        **{EXPERIMENTAL_API_HEADER: ELASTICSEARCH_HEADER_VALUE},
    )
    expected_response = {
        "category": "awarding_agency",
        "limit": 10,
        "page_metadata": {"page": 1, "next": None, "previous": None, "hasNext": False, "hasPrevious": False},
        "results": [
            {"amount": 10.0, "name": "Awarding Toptier Agency 3", "code": "TA3", "id": 1003},
            {"amount": 5.0, "name": "Awarding Toptier Agency 1", "code": "TA1", "id": 1001},
        ],
        "messages": [get_time_period_message()],
    }
    assert resp.status_code == status.HTTP_200_OK, "Failed to return 200 Response"
    assert len(logging_statements) == 1, "Expected one logging statement"
    assert resp.json() == expected_response
def test_spending_by_transaction_count(monkeypatch, transaction_type_data, elasticsearch_transaction_index):
    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index, logging_statements)

    request_data = {"filters": {"keywords": ["pop tart"]}}
    results = spending_by_transaction_count(request_data)
    expected_results = {"contracts": 1, "grants": 1, "idvs": 1, "loans": 1, "direct_payments": 1, "other": 1}
    assert results == expected_results
def test_get_download_ids(monkeypatch, transaction_type_data, elasticsearch_transaction_index):
    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index, logging_statements)

    results = get_download_ids(["pop tart"], "transaction_id")
    transaction_ids = list(itertools.chain.from_iterable(results))
    expected_results = [1, 2, 3, 4, 5, 6]

    assert transaction_ids == expected_results
def test_success_with_all_filters(client, monkeypatch, elasticsearch_transaction_index, basic_award):
    """
    General test to make sure that all groups respond with a Status Code of 200 regardless of the filters.
    """

    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index, logging_statements)

    resp = client.post(
        "/api/v2/search/spending_by_category/awarding_subagency",
        content_type="application/json",
        data=json.dumps({"filters": non_legacy_filters()}),
        **{EXPERIMENTAL_API_HEADER: ELASTICSEARCH_HEADER_VALUE},
    )
    assert resp.status_code == status.HTTP_200_OK, "Failed to return 200 Response"
    assert len(logging_statements) == 1, "Expected one logging statement"
Ejemplo n.º 9
0
def test_correct_response(client, monkeypatch, elasticsearch_transaction_index, awards_and_transactions):

    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index, logging_statements)

    resp = client.post(
        "/api/v2/search/spending_by_category/recipient_duns",
        content_type="application/json",
        data=json.dumps({"filters": {"time_period": [{"start_date": "2007-10-01", "end_date": "2020-09-30"}]}}),
        **{EXPERIMENTAL_API_HEADER: ELASTICSEARCH_HEADER_VALUE},
    )
    expected_response = {
        "category": "recipient_duns",
        "limit": 10,
        "page_metadata": {"page": 1, "next": None, "previous": None, "hasNext": False, "hasPrevious": False},
        "results": [
            {
                "amount": 5000000.0,
                "code": "DUNS Number not provided",
                "name": "MULTIPLE RECIPIENTS",
                "recipient_id": None,
            },
            {"amount": 550000.0, "code": "123456789", "name": None, "recipient_id": None},
            {"amount": 5000.0, "code": "096354360", "name": "MULTIPLE RECIPIENTS", "recipient_id": None},
            {
                "amount": 500.0,
                "code": "987654321",
                "name": "RECIPIENT 3",
                "recipient_id": "d2894d22-67fc-f9cb-4005-33fa6a29ef86-C",
            },
            {"amount": 50.0, "code": "456789123", "name": "RECIPIENT 2", "recipient_id": None},
            {
                "amount": 5.0,
                "code": "DUNS Number not provided",
                "name": "RECIPIENT 1",
                "recipient_id": "5f572ec9-8b49-e5eb-22c7-f6ef316f7689-R",
            },
        ],
        "messages": [get_time_period_message()],
    }
    assert resp.status_code == status.HTTP_200_OK, "Failed to return 200 Response"
    assert len(logging_statements) == 1, "Expected one logging statement"
    assert resp.json() == expected_response
Ejemplo n.º 10
0
def test_a_search_endpoint(client, monkeypatch, award_data_fixture,
                           elasticsearch_transaction_index):
    """
    An example of how one might test a keyword search.
    """
    # This is the important part.  This ensures data is loaded into your Elasticsearch.
    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index,
                             logging_statements)
    query = {
        "filters": {
            "keyword": "IND12PB00323",
            "award_type_codes": ["A", "B", "C", "D"]
        },
        "fields": [
            "Award ID",
            "Mod",
            "Recipient Name",
            "Action Date",
            "Transaction Amount",
            "Awarding Agency",
            "Awarding Sub Agency",
            "Award Type",
        ],
        "page":
        1,
        "limit":
        35,
        "sort":
        "Transaction Amount",
        "order":
        "desc",
    }
    response = client.post("/api/v2/search/spending_by_transaction",
                           content_type="application/json",
                           data=json.dumps(query))
    assert response.status_code == status.HTTP_200_OK
    assert len(response.data["results"]) == 1
Ejemplo n.º 11
0
def test_top_1_fails_with_es_transactions_routed_dangerously(client, monkeypatch, elasticsearch_transaction_index, db):
    """
    This confirms vulnerability of high-cardinality aggregations documented in DEV-4685, that leads to inaccurate
    summing and ordering of sums when taking less buckets than the term cardinality.

    This is shown by manually applying a routing key (using a key value stuck in ``awards.piid`` field here as the
    routing key value) on index do that documents are distributed as below

    NOTE: This requires an ES cluster with at least 3 shards for the transaction index. Ours should be defaulted to 5.

    Recipient shard0   shard1   shard2   shard3   shard4
    Biz 1      $2.00
    Biz 1                       $ 7.00
    Biz 1                       $ 3.00
    Biz 1              $ 2.00
    Biz 1              $ 3.00
    Biz 1              $ 5.00
    Biz 2              $ 6.00
    Biz 2              $ 3.00
    Biz 2                       $ 2.00
    Biz 2                       $ 3.00
    Biz 2                       $ 4.00
    Biz 2     $13.00

    **IF THIS TEST FAILS**
        - Did our cluster structure change to not be 5 shards per the transaction index?
        - Did the transaction<->award DB linkage change?
        - Did we change ES version or config?
            - Investigate if Elasticsearch has changed the way they do routing or hash routing key values
    """

    # Setup data for this test
    recipient1 = uuid.uuid4()
    recipient2 = uuid.uuid4()

    # Recipient Lookup
    mommy.make("recipient.RecipientLookup", id=1, recipient_hash=recipient1, legal_business_name="Biz 1", duns="111")
    mommy.make("recipient.RecipientLookup", id=2, recipient_hash=recipient2, legal_business_name="Biz 2", duns="222")

    # Transaction FPDS
    _make_fpds_transaction(1, 1, 2.00, "2020-01-01", "111", "Biz 1")
    _make_fpds_transaction(2, 3, 7.00, "2020-02-02", "111", "Biz 1")
    _make_fpds_transaction(3, 3, 3.00, "2020-03-03", "111", "Biz 1")
    _make_fpds_transaction(4, 2, 2.00, "2020-01-02", "111", "Biz 1")
    _make_fpds_transaction(5, 2, 3.00, "2020-02-03", "111", "Biz 1")
    _make_fpds_transaction(6, 2, 5.00, "2020-03-04", "111", "Biz 1")
    _make_fpds_transaction(7, 2, 6.00, "2020-01-03", "222", "Biz 2")
    _make_fpds_transaction(8, 2, 3.00, "2020-02-04", "222", "Biz 2")
    _make_fpds_transaction(9, 3, 2.00, "2020-03-05", "222", "Biz 2")
    _make_fpds_transaction(10, 3, 3.00, "2020-01-04", "222", "Biz 2")
    _make_fpds_transaction(11, 3, 4.00, "2020-02-05", "222", "Biz 2")
    _make_fpds_transaction(12, 1, 13.00, "2020-03-06", "222", "Biz 2")

    # Awards
    # Jam a routing key value into the piid field, and use the derived piid value for routing documents to shards later
    mommy.make("awards.Award", id=1, latest_transaction_id=12, piid="shard_zero")
    mommy.make("awards.Award", id=2, latest_transaction_id=6, piid="shard_one")
    mommy.make("awards.Award", id=3, latest_transaction_id=9, piid="shard_two")

    # Push DB data into the test ES cluster
    # NOTE: Force routing of documents by the piid field, which will separate them int 3 groups, leading to an
    # inaccurate sum and ordering of sums
    logging_statements = []

    # Using piid (derived from the transaction's award) to route transaction documents to shards
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index, logging_statements, routing="piid")

    search = TransactionSearch()
    total = search.handle_count()
    assert total == 12, "Should have seen 12 documents indexed for this test"

    group_by_agg = A("terms", field="recipient_hash", size=1, shard_size=1, order={"sum_agg": "desc"})
    sum_agg = A("sum", field="generated_pragmatic_obligation")
    search.aggs.bucket("results", group_by_agg).metric("sum_agg", sum_agg)

    logging.getLogger("console").debug(f"=>->=>->=>-> WILL RUN THIS ES QUERY: \n {search.extra(size=0).to_dict()}")
    response = search.extra(size=0).handle_execute().to_dict()
    results = []
    for bucket in response["aggregations"]["results"]["buckets"]:
        results.append({"key": bucket["key"], "sum": bucket["sum_agg"]["value"]})
    print(results)
    assert len(results) == 1
    assert results[0]["key"] == str(
        recipient1
    ), "This botched 'Top 1' sum agg should have incorrectly chosen the lesser recipient"
    assert results[0]["sum"] == 20.0, "The botched 'Top 1' sum agg should have incorrectly summed up recipient totals"
Ejemplo n.º 12
0
def test_top_1_with_es_transactions_routed_by_recipient(client, monkeypatch, elasticsearch_transaction_index, db):
    """
    This tests the approach to compensating for high-cardinality aggregations documented in DEV-4685,
    to ensure accuracy and completeness of aggregations and sorting even when taking less buckets than the term
    cardinality.

    Without the code to route indexing of transaction documents in elasticsearch to shards by the `recipient_agg_key`,
    which was added to :meth:`usaspending_api.etl.es_etl_helpers.csv_chunk_gen`, the below agg queries should lead to
    inaccurate results, as shown in the DEV-4538.

    With routing by recipient, documents will be allocated to shards as below

    Recipient shard0   shard1   shard2   shard3   shard4
    Biz 1      $2.00
    Biz 1      $ 7.00
    Biz 1      $ 3.00
    Biz 1      $ 2.00
    Biz 1      $ 3.00
    Biz 1      $ 5.00
    Biz 2              $ 6.00
    Biz 2              $ 3.00
    Biz 2              $ 2.00
    Biz 2              $ 3.00
    Biz 2              $ 4.00
    Biz 2              $13.00

    **IF THIS TEST FAILS**
        - Are we still using the TestElasticSearchIndex fixture to help with pushing test data to ES?
        - Did TestElasticSearchIndex indexing / routing behavior change?
        - Did our cluster structure change to not be 5 shards per the transaction index?
        - Did the transaction<->recipient DB linkage change?
        - Did we change ES version or config?
            - Investigate if Elasticsearch has changed the way they do routing or hash routing key values
    """

    # Setup data for this test

    recipient1 = uuid.uuid4()
    recipient2 = uuid.uuid4()

    # Recipient Lookup
    mommy.make("recipient.RecipientLookup", id=1, recipient_hash=recipient1, legal_business_name="Biz 1", duns="111")
    mommy.make("recipient.RecipientLookup", id=2, recipient_hash=recipient2, legal_business_name="Biz 2", duns="222")

    # Transaction FPDS
    _make_fpds_transaction(1, 1, 2.00, "2020-01-01", "111", "Biz 1")
    _make_fpds_transaction(2, 3, 7.00, "2020-02-02", "111", "Biz 1")
    _make_fpds_transaction(3, 3, 3.00, "2020-03-03", "111", "Biz 1")
    _make_fpds_transaction(4, 2, 2.00, "2020-01-02", "111", "Biz 1")
    _make_fpds_transaction(5, 2, 3.00, "2020-02-03", "111", "Biz 1")
    _make_fpds_transaction(6, 2, 5.00, "2020-03-04", "111", "Biz 1")
    _make_fpds_transaction(7, 2, 6.00, "2020-01-03", "222", "Biz 2")
    _make_fpds_transaction(8, 2, 3.00, "2020-02-04", "222", "Biz 2")
    _make_fpds_transaction(9, 3, 2.00, "2020-03-05", "222", "Biz 2")
    _make_fpds_transaction(10, 3, 3.00, "2020-01-04", "222", "Biz 2")
    _make_fpds_transaction(11, 3, 4.00, "2020-02-05", "222", "Biz 2")
    _make_fpds_transaction(12, 1, 13.00, "2020-03-06", "222", "Biz 2")

    # Awards
    mommy.make("awards.Award", id=1, latest_transaction_id=12)
    mommy.make("awards.Award", id=2, latest_transaction_id=6)
    mommy.make("awards.Award", id=3, latest_transaction_id=9)

    # Push DB data into the test ES cluster
    logging_statements = []
    setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index, logging_statements)

    search = TransactionSearch()
    total = search.handle_count()
    assert total == 12, "Should have seen 12 documents indexed for this test"

    group_by_agg = A("terms", field="recipient_hash", size=1, shard_size=1, order={"sum_agg": "desc"})
    sum_agg = A("sum", field="generated_pragmatic_obligation")
    search.aggs.bucket("results", group_by_agg).metric("sum_agg", sum_agg)

    logging.getLogger("console").debug(f"=>->=>->=>-> WILL RUN THIS ES QUERY: \n {search.extra(size=0).to_dict()}")
    response = search.extra(size=0).handle_execute().to_dict()
    results = []
    for bucket in response["aggregations"]["results"]["buckets"]:
        results.append({"key": bucket["key"], "sum": bucket["sum_agg"]["value"]})
    assert len(results) == 1
    assert results[0]["key"] == str(
        recipient2
    ), "The 'Top 1' sum agg incorrectly chose the recipient with a lesser total sum"
    assert results[0]["sum"] == 31.0, "The 'Top 1' sum agg incorrectly summed up recipient totals"