def test_reshape_filters_year_all(): year = 'all' result = reshape_filters(year=year) expected = {'start_date': settings.API_SEARCH_MIN_DATE, 'end_date': datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')} assert result['time_period'][0] == expected
def obtain_state_totals(fips, year=None, award_type_codes=None, subawards=False): filters = reshape_filters(state_code=VALID_FIPS[fips]["code"], year=year, award_type_codes=award_type_codes) if not subawards: queryset = (matview_search_filter( filters, SummaryStateView).values("pop_state_code").annotate( total=Sum("generated_pragmatic_obligation"), distinct_awards=StringAgg("distinct_awards", ",")).values("distinct_awards", "pop_state_code", "total")) try: row = list(queryset)[0] result = { "pop_state_code": row["pop_state_code"], "total": row["total"], "count": len(set(row["distinct_awards"].split(","))), } return result except IndexError: # would prefer to catch an index error gracefully if the SQL query produces 0 rows logger.warn("No results found for FIPS {} with filters: {}".format( fips, filters)) return {"count": 0, "pop_state_code": None, "total": 0}
def test_reshape_filters_year_latest(): year = 'latest' result = reshape_filters(year=year) expected = {'start_date': datetime.datetime.strftime(datetime.datetime.now() - relativedelta(years=1), '%Y-%m-%d'), 'end_date': datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')} assert result['time_period'][0] == expected
def obtain_state_totals(fips, year=None, award_type_codes=None, subawards=False): filters = reshape_filters(state_code=VALID_FIPS[fips]['code'], year=year, award_type_codes=award_type_codes) if not subawards: queryset = matview_search_filter(filters, SummaryStateView) \ .values('pop_state_code') \ .annotate( total=Sum('generated_pragmatic_obligation'), distinct_awards=StringAgg('distinct_awards', ',')) \ .values('distinct_awards', 'pop_state_code', 'total') try: row = list(queryset)[0] result = { 'pop_state_code': row['pop_state_code'], 'total': row['total'], 'count': len(set(row['distinct_awards'].split(','))), } return result except IndexError: # would prefer to catch an index error gracefully if the SQL query produces 0 rows logger.warn('No results found for FIPS {} with filters: {}'.format( fips, filters)) return {'count': 0, 'pop_state_code': None, 'total': 0}
def test_reshape_filters_year_all(): year = 'all' result = reshape_filters(year=year) expected = { 'start_date': settings.API_SEARCH_MIN_DATE, 'end_date': datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d') } assert result['time_period'][0] == expected
def test_reshape_filters_year_latest(): year = 'latest' result = reshape_filters(year=year) expected = { 'start_date': datetime.datetime.strftime( datetime.datetime.now() - relativedelta(years=1), '%Y-%m-%d'), 'end_date': datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d') } assert result['time_period'][0] == expected
def obtain_recipient_totals(recipient_id, children=False, year="latest", subawards=False): """ Extract the total amount and transaction count for the recipient_hash given the timeframe Args: recipient_id: string of hash(duns, name)-[recipient-level] children: whether or not to group by children year: the year the totals/counts are based on subawards: whether to total based on subawards Returns: list of dictionaries representing hashes and their totals/counts """ if year == "latest" and children is False: # Simply pull the total and count from RecipientProfile recipient_hash = recipient_id[:-2] recipient_level = recipient_id[-1] results = list( RecipientProfile.objects.filter( recipient_hash=recipient_hash, recipient_level=recipient_level).annotate( total=F("last_12_months"), count=F("last_12_months_count")).values( "recipient_hash", "recipient_unique_id", "recipient_name", "total", "count")) else: filters = reshape_filters(recipient_id=recipient_id, year=year) queryset, model = recipient_totals(filters) if children: # Group by the child recipients queryset = (queryset.values( "recipient_hash", "recipient_unique_id", "recipient_name").annotate( total=Sum("generated_pragmatic_obligation"), count=Sum("counts")).values("recipient_hash", "recipient_unique_id", "recipient_name", "total", "count")) results = list(queryset) else: # Calculate the overall totals aggregates = queryset.aggregate( total=Sum("generated_pragmatic_obligation"), count=Sum("counts")) aggregates.update({"recipient_hash": recipient_id[:-2]}) results = [aggregates] for result in results: result["count"] = result["count"] if result["count"] else 0 result["total"] = result["total"] if result["total"] else 0 return results
def get_all_states(year=None, award_type_codes=None, subawards=False): filters = reshape_filters(year=year, award_type_codes=award_type_codes) if not subawards: # calculate award total filtered by state queryset = matview_search_filter(filters, SummaryStateView) \ .filter(pop_state_code__isnull=False, pop_country_code='USA') \ .values('pop_state_code') \ .annotate( total=Sum('generated_pragmatic_obligation'), distinct_awards=StringAgg('distinct_awards', ',')) \ .values('pop_state_code', 'total', 'distinct_awards') results = [{ 'pop_state_code': row['pop_state_code'], 'total': row['total'], 'count': len(set(row['distinct_awards'].split(','))), } for row in list(queryset)] return results
def get_all_states(year=None, award_type_codes=None, subawards=False): filters = reshape_filters(year=year, award_type_codes=award_type_codes) if not subawards: # calculate award total filtered by state queryset = (matview_search_filter(filters, SummaryStateView).filter( pop_state_code__isnull=False, pop_country_code="USA").values("pop_state_code").annotate( total=Sum("generated_pragmatic_obligation"), distinct_awards=StringAgg("distinct_awards", ",")).values("pop_state_code", "total", "distinct_awards")) results = [{ "pop_state_code": row["pop_state_code"], "total": row["total"], "count": len(set(row["distinct_awards"].split(","))), } for row in list(queryset)] return results
def obtain_recipient_totals(recipient_id, children=False, year='latest', subawards=False): """ Extract the total amount and transaction count for the recipient_hash given the timeframe Args: recipient_id: string of hash(duns, name)-[recipient-level] children: whether or not to group by children year: the year the totals/counts are based on subawards: whether to total based on subawards Returns: list of dictionaries representing hashes and their totals/counts """ if year == 'latest' and children is False: # Simply pull the total and count from RecipientProfile recipient_hash = recipient_id[:-2] recipient_level = recipient_id[-1] results = list(RecipientProfile.objects.filter(recipient_hash=recipient_hash, recipient_level=recipient_level) .annotate(total=F('last_12_months'), count=F('last_12_months_count')) .values('recipient_hash', 'recipient_unique_id', 'recipient_name', 'total', 'count')) else: filters = reshape_filters(recipient_id=recipient_id, year=year) queryset, model = recipient_totals(filters) if children: # Group by the child recipients queryset = queryset.values('recipient_hash', 'recipient_unique_id', 'recipient_name') \ .annotate(total=Sum('generated_pragmatic_obligation'), count=Sum('counts')) \ .values('recipient_hash', 'recipient_unique_id', 'recipient_name', 'total', 'count') results = list(queryset) else: # Calculate the overall totals aggregates = queryset.aggregate(total=Sum('generated_pragmatic_obligation'), count=Sum('counts')) aggregates.update({'recipient_hash': recipient_id[:-2]}) results = [aggregates] for result in results: result['count'] = result['count'] if result['count'] else 0 result['total'] = result['total'] if result['total'] else 0 return results
def test_reshape_filters_duns(): duns = '012345678' result = reshape_filters(duns_search_texts=duns) assert result['recipient_search_text'] == duns
def test_reshape_filters_recipient_id(): recipient_id = '00000-fddfdbe-3fcsss5-9d252-d436c0ae8758c-R' result = reshape_filters(recipient_id=recipient_id) assert result['recipient_id'] == recipient_id
def test_reshape_filters_state(): result = reshape_filters(state_code='AB') expected = {'country': 'USA', 'state': 'AB'} assert result['place_of_performance_locations'][0] == expected
def test_reshape_filters_award_type_codes(): award_type_codes = ['A', 'B'] result = reshape_filters(award_type_codes=award_type_codes) assert result['award_type_codes'] == award_type_codes
def test_reshape_filters_year_digit(): year = '2017' result = reshape_filters(year=year) expected = {'start_date': '2016-10-01', 'end_date': '2017-09-30'} assert result['time_period'][0] == expected
def test_reshape_filters_year_digit(): year = '2017' result = reshape_filters(year=year) expected = {'start_date': '2016-10-01', 'end_date': '2017-09-30'} assert result['time_period'][0] == expected
def test_reshape_filters_state(): result = reshape_filters(state_code='AB') expected = {'country': 'USA', 'state': 'AB'} assert result['place_of_performance_locations'][0] == expected
def test_reshape_filters_award_type_codes(): award_type_codes = ['A', 'B'] result = reshape_filters(award_type_codes=award_type_codes) assert result['award_type_codes'] == award_type_codes
def test_reshape_filters_duns(): duns = '012345678' result = reshape_filters(duns_search_texts=duns) assert result['recipient_search_text'] == duns
def test_reshape_filters_recipient_id(): recipient_id = '00000-fddfdbe-3fcsss5-9d252-d436c0ae8758c-R' result = reshape_filters(recipient_id=recipient_id) assert result['recipient_id'] == recipient_id
def obtain_recipient_totals(recipient_id, children=False, year="latest"): """Extract the total amount and transaction count for the recipient_hash given the time frame Args: recipient_id: string of hash(duns, name)-[recipient-level] children: whether or not to group by children year: the year the totals/counts are based on Returns: list of dictionaries representing hashes and their totals/counts """ filters = reshape_filters(recipient_id=recipient_id, year=year) filter_query = QueryWithFilters.generate_transactions_elasticsearch_query( filters) search = TransactionSearch().filter(filter_query) if children: group_by_field = "recipient_agg_key" elif recipient_id[-2:] == "-P": group_by_field = "parent_recipient_hash" else: group_by_field = "recipient_hash" bucket_count = get_number_of_unique_terms_for_transactions( filter_query, f"{group_by_field}.hash") if bucket_count == 0: return [] # Not setting the shard_size since the number of child recipients under a # parent recipient will not exceed 10k group_by_recipient = A("terms", field=group_by_field, size=bucket_count) sum_obligation = get_scaled_sum_aggregations( "generated_pragmatic_obligation")["sum_field"] filter_loans = A("filter", terms={"type": list(loan_type_mapping.keys())}) sum_face_value_loan = get_scaled_sum_aggregations( "face_value_loan_guarantee")["sum_field"] search.aggs.bucket("group_by_recipient", group_by_recipient) search.aggs["group_by_recipient"].metric("sum_obligation", sum_obligation) search.aggs["group_by_recipient"].bucket("filter_loans", filter_loans) search.aggs["group_by_recipient"]["filter_loans"].metric( "sum_face_value_loan", sum_face_value_loan) response = search.handle_execute() response_as_dict = response.aggs.to_dict() recipient_info_buckets = response_as_dict.get("group_by_recipient", {}).get("buckets", []) result_list = [] for bucket in recipient_info_buckets: result = {} if children: recipient_info = json.loads(bucket.get("key")) hash_with_level = recipient_info.get("hash_with_level") or None result = { "recipient_hash": hash_with_level[:-2] if hash_with_level else None, "recipient_unique_id": recipient_info.get("unique_id"), "recipient_name": recipient_info.get("name"), } loan_info = bucket.get("filter_loans", {}) result.update({ "total_obligation_amount": int(bucket.get("sum_obligation", {"value": 0})["value"]) / Decimal("100"), "total_obligation_count": bucket.get("doc_count", 0), "total_face_value_loan_amount": int(loan_info.get("sum_face_value_loan", {"value": 0})["value"]) / Decimal("100"), "total_face_value_loan_count": loan_info.get("doc_count", 0), }) result_list.append(result) return result_list