Example #1
0
    def post(self, request):
        """Return all budget function/subfunction titles matching the provided search text"""
        # TODO: check logic in name_dict[x]["aggregated_amount"] statements

        json_request = request.data
        category = json_request.get("category", None)
        scope = json_request.get("scope", None)
        filters = json_request.get("filters", None)
        limit = json_request.get("limit", 10)
        page = json_request.get("page", 1)

        lower_limit = (page - 1) * limit
        upper_limit = page * limit

        if category is None:
            raise InvalidParameterException("Missing one or more required request parameters: category")
        potential_categories = ["awarding_agency", "funding_agency", "recipient", "cfda_programs", "industry_codes"]
        if category not in potential_categories:
            raise InvalidParameterException("Category does not have a valid value")
        if (scope is None) and (category != "cfda_programs"):
            raise InvalidParameterException("Missing one or more required request parameters: scope")
        if filters is None:
            raise InvalidParameterException("Missing one or more required request parameters: filters")

        # filter queryset
        queryset = matview_search_filter(filters, UniversalTransactionView)

        filter_types = filters['award_type_codes'] if 'award_type_codes' in filters else award_type_mapping

        # filter the transactions by category
        if category == "awarding_agency":
            potential_scopes = ["agency", "subagency"]
            if scope not in potential_scopes:
                raise InvalidParameterException("scope does not have a valid value")

            if scope == "agency":
                queryset = queryset \
                    .filter(awarding_toptier_agency_name__isnull=False) \
                    .values(
                        agency_name=F('awarding_toptier_agency_name'),
                        agency_abbreviation=F('awarding_toptier_agency_abbreviation'))

            elif scope == "subagency":
                queryset = queryset \
                    .filter(
                        awarding_subtier_agency_name__isnull=False) \
                    .values(
                        agency_name=F('awarding_subtier_agency_name'),
                        agency_abbreviation=F('awarding_subtier_agency_abbreviation'))

            elif scope == "office":
                    # NOT IMPLEMENTED IN UI
                    raise NotImplementedError

            queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types)\
                .order_by('-aggregated_amount')
            results = list(queryset[lower_limit:upper_limit + 1])

            page_metadata = get_simple_pagination_metadata(len(results), limit, page)
            results = results[:limit]

            response = {"category": category, "scope": scope, "limit": limit, "results": results,
                        "page_metadata": page_metadata}
            return Response(response)

        elif category == "funding_agency":
            potential_scopes = ["agency", "subagency"]
            if scope not in potential_scopes:
                raise InvalidParameterException("scope does not have a valid value")

            if scope == "agency":
                queryset = queryset \
                    .filter(funding_toptier_agency_name__isnull=False) \
                    .values(
                        agency_name=F('funding_toptier_agency_name'),
                        agency_abbreviation=F('funding_toptier_agency_abbreviation'))

            elif scope == "subagency":
                queryset = queryset \
                    .filter(
                        funding_subtier_agency_name__isnull=False) \
                    .values(
                        agency_name=F('funding_subtier_agency_name'),
                        agency_abbreviation=F('funding_subtier_agency_abbreviation'))

            elif scope == "office":
                # NOT IMPLEMENTED IN UI
                raise NotImplementedError

            queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \
                .order_by('-aggregated_amount')
            results = list(queryset[lower_limit:upper_limit + 1])

            page_metadata = get_simple_pagination_metadata(len(results), limit, page)
            results = results[:limit]

            response = {"category": category, "scope": scope, "limit": limit, "results": results,
                        "page_metadata": page_metadata}
            return Response(response)

        elif category == "recipient":
            if scope == "duns":
                queryset = queryset \
                    .values(legal_entity_id=F("recipient_id"))
                queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \
                    .order_by('-aggregated_amount') \
                    .values("aggregated_amount", "legal_entity_id", "recipient_name") \
                    .order_by("-aggregated_amount")

                # Begin DB hits here
                results = list(queryset[lower_limit:upper_limit + 1])

                page_metadata = get_simple_pagination_metadata(len(results), limit, page)
                results = results[:limit]

            elif scope == "parent_duns":
                queryset = queryset \
                    .filter(parent_recipient_unique_id__isnull=False)
                queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types,
                                                  calculate_totals=False) \
                    .values(
                        'aggregated_amount',
                        'recipient_name',
                        'parent_recipient_unique_id') \
                    .order_by('-aggregated_amount')

                # Begin DB hits here
                results = list(queryset[lower_limit:upper_limit + 1])
                page_metadata = get_simple_pagination_metadata(len(results), limit, page)
                results = results[:limit]

            else:  # recipient_type
                raise InvalidParameterException("recipient type is not yet implemented")

            response = {"category": category, "scope": scope, "limit": limit, "results": results,
                        "page_metadata": page_metadata}
            return Response(response)

        elif category == "cfda_programs":
            if can_use_view(filters, 'SummaryCfdaNumbersView'):
                queryset = get_view_queryset(filters, 'SummaryCfdaNumbersView')
                queryset = queryset \
                    .filter(
                        federal_action_obligation__isnull=False,
                        cfda_number__isnull=False) \
                    .values(cfda_program_number=F("cfda_number"))
                queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \
                    .values(
                        "aggregated_amount",
                        "cfda_program_number",
                        program_title=F("cfda_title")) \
                    .order_by('-aggregated_amount')

                # Begin DB hits here
                results = list(queryset[lower_limit:upper_limit + 1])
                page_metadata = get_simple_pagination_metadata(len(results), limit, page)
                results = results[:limit]
                for trans in results:
                    trans['popular_name'] = None
                    # small DB hit every loop here
                    cfda = Cfda.objects \
                        .filter(
                            program_title=trans['program_title'],
                            program_number=trans['cfda_program_number']) \
                        .values('popular_name').first()

                    if cfda:
                        trans['popular_name'] = cfda['popular_name']

            else:
                queryset = queryset \
                    .filter(
                        cfda_number__isnull=False) \
                    .values(cfda_program_number=F("cfda_number"))
                queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \
                    .values(
                        "aggregated_amount",
                        "cfda_program_number",
                        popular_name=F("cfda_popular_name"),
                        program_title=F("cfda_title")) \
                    .order_by('-aggregated_amount')

                # Begin DB hits here
                results = list(queryset[lower_limit:upper_limit + 1])
                page_metadata = get_simple_pagination_metadata(len(results), limit, page)
                results = results[:limit]

            response = {"category": category, "limit": limit, "results": results, "page_metadata": page_metadata}
            return Response(response)

        elif category == "industry_codes":  # industry_codes
            if scope == "psc":
                if can_use_view(filters, 'SummaryPscCodesView'):
                    queryset = get_view_queryset(filters, 'SummaryPscCodesView')
                    queryset = queryset \
                        .filter(product_or_service_code__isnull=False) \
                        .values(psc_code=F("product_or_service_code"))
                else:
                    queryset = queryset \
                        .filter(psc_code__isnull=False) \
                        .values("psc_code")

                queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \
                    .order_by('-aggregated_amount')
                # Begin DB hits here
                results = list(queryset[lower_limit:upper_limit + 1])

                page_metadata = get_simple_pagination_metadata(len(results), limit, page)
                results = results[:limit]

                response = {"category": category, "scope": scope, "limit": limit, "results": results,
                            "page_metadata": page_metadata}
                return Response(response)

            elif scope == "naics":
                if can_use_view(filters, 'SummaryNaicsCodesView'):
                    queryset = get_view_queryset(filters, 'SummaryNaicsCodesView')
                    queryset = queryset \
                        .filter(naics_code__isnull=False) \
                        .values('naics_code')
                    queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \
                        .order_by('-aggregated_amount') \
                        .values(
                            'naics_code',
                            'aggregated_amount',
                            'naics_description')
                else:
                    queryset = queryset \
                        .filter(naics_code__isnull=False) \
                        .values("naics_code")
                    queryset = sum_transaction_amount(queryset, 'aggregated_amount', filter_types=filter_types) \
                        .order_by('-aggregated_amount') \
                        .values(
                            'naics_code',
                            'aggregated_amount',
                            'naics_description')

                # Begin DB hits here
                results = list(queryset[lower_limit:upper_limit + 1])

                page_metadata = get_simple_pagination_metadata(len(results), limit, page)
                results = results[:limit]

                response = {"category": category, "scope": scope, "limit": limit, "results": results,
                            "page_metadata": page_metadata}
                return Response(response)

            else:  # recipient_type
                raise InvalidParameterException("recipient type is not yet implemented")
    def get_queryset(self):
        # retrieve post request payload
        json_request = self.request.query_params

        # Retrieve fiscal_year & awarding_agency_id from Request
        fiscal_year = json_request.get('fiscal_year')
        awarding_agency_id = json_request.get('awarding_agency_id')

        # Optional Award Category
        award_category = json_request.get('award_category')

        # Required Query Parameters were not Provided
        if not (fiscal_year and awarding_agency_id):
            raise InvalidParameterException(
                'Missing one or more required query parameters: fiscal_year, awarding_agency_id'
            )
        if not check_valid_toptier_agency(awarding_agency_id):
            raise InvalidParameterException(
                'Awarding Agency ID provided must correspond to a toptier agency'
            )

        toptier_agency = Agency.objects.filter(
            id=awarding_agency_id).first().toptier_agency
        queryset = TransactionNormalized.objects.filter(
            federal_action_obligation__isnull=False)

        # DS-1655: if the AID is "097" (DOD), Include the branches of the military in the queryset
        if toptier_agency.cgac_code == DOD_CGAC:
            tta_list = DOD_ARMED_FORCES_CGAC
            queryset = queryset.filter(
                # Filter based on fiscal_year and awarding_category_id
                fiscal_year=fiscal_year,
                awarding_agency__toptier_agency__cgac_code__in=tta_list)
        else:
            queryset = queryset.filter(
                # Filter based on fiscal_year and awarding_category_id
                fiscal_year=fiscal_year,
                awarding_agency__toptier_agency__cgac_code=toptier_agency.
                cgac_code)

        queryset = queryset.annotate(
            award_category=F('award__category'),
            recipient_id=F('recipient__legal_entity_id'),
            recipient_name=F('recipient__recipient_name'))

        if award_category is not None:
            # Filter based on award_category
            if award_category != "other":
                queryset = queryset.filter(award_category=award_category)
            else:
                queryset = queryset.filter(Q(award_category='insurance') | Q(award_category='other')).\
                    annotate(award_category=Case(When(award_category='insurance', then=Value('other')),
                                                 output_field=CharField())
                             )

        # Sum Obligations for each Recipient
        queryset = queryset.values(
            'award_category', 'recipient_id',
            'recipient_name').annotate(obligated_amount=Sum(
                'federal_action_obligation')).order_by('-obligated_amount')

        return queryset
def type_filter(_type, filters, limit=None):
    fiscal_year = None
    fiscal_quarter = None
    fiscal_date = None

    _types = [
        "budget_function",
        "budget_subfunction",
        "federal_account",
        "program_activity",
        "object_class",
        "recipient",
        "award",
        "award_category",
        "agency",
        "agency_type",
        "agency_sub",
    ]

    # Validate explorer _type
    if _type is None:
        raise InvalidParameterException(
            'Missing Required Request Parameter, "type": "type"')

    elif _type not in _types:
        raise InvalidParameterException(
            "Type does not have a valid value. "
            "Valid Types: budget_function, budget_subfunction, federal_account, program_activity,"
            "object_class, recipient, award, award_category agency, agency_type, agency_sub"
        )

    if filters is None:
        raise InvalidParameterException(
            'Missing Required Request Parameter, "filters": { "filter_options" }'
        )

    # Get fiscal_date and fiscal_quarter
    for key, value in filters.items():
        if key == "fy":
            try:
                fiscal_year = int(value)
                if fiscal_year < 1000 or fiscal_year > 9999:
                    raise InvalidParameterException(
                        'Incorrect Fiscal Year Parameter, "fy": "YYYY"')
            except ValueError:
                raise InvalidParameterException(
                    'Incorrect or Missing Fiscal Year Parameter, "fy": "YYYY"')
        elif key == "quarter":
            if value in ("1", "2", "3", "4"):
                fiscal_quarter = int(value)
            else:
                raise InvalidParameterException(
                    "Incorrect value provided for quarter parameter. Must be a string between 1 and 4"
                )

    if fiscal_year:
        fiscal_date, fiscal_quarter = generate_last_completed_fiscal_quarter(
            fiscal_year=fiscal_year, fiscal_quarter=fiscal_quarter)

    # Recipient, Award Queryset
    alt_set = (FinancialAccountsByAwards.objects.all().exclude(
        transaction_obligated_amount__isnull=True).exclude(
            transaction_obligated_amount="NaN").filter(
                submission__reporting_fiscal_quarter=fiscal_quarter).filter(
                    submission__reporting_fiscal_year=fiscal_year).annotate(
                        amount=Sum("transaction_obligated_amount")))

    # Base Queryset
    queryset = (FinancialAccountsByProgramActivityObjectClass.objects.all(
    ).exclude(
        obligations_incurred_by_program_object_class_cpe__isnull=True
    ).filter(submission__reporting_fiscal_quarter=fiscal_quarter).filter(
        submission__reporting_fiscal_year=fiscal_year).annotate(
            amount=Sum("obligations_incurred_by_program_object_class_cpe")))

    # Apply filters to queryset results
    alt_set, queryset = spending_filter(alt_set, queryset, filters, _type)

    if _type == "recipient" or _type == "award" or _type == "award_category" or _type == "agency_type":
        # Annotate and get explorer _type filtered results
        exp = Explorer(alt_set, queryset)

        if _type == "recipient":
            alt_set = exp.recipient()
        if _type == "award":
            alt_set = exp.award()
        if _type == "award_category":
            alt_set = exp.award_category()

        # Total value of filtered results
        actual_total = 0

        for award in alt_set:
            award["id"] = str(award["id"])
            if _type in ["award", "award_category"]:
                code = None
                for code_type in ("piid", "fain", "uri"):
                    if award[code_type]:
                        code = award[code_type]
                        break
                for code_type in ("piid", "fain", "uri"):
                    del award[code_type]
                award["code"] = code
                if _type == "award":
                    award["name"] = code
            actual_total += award["total"]

        alt_set = alt_set[:limit] if _type == "award" else alt_set

        results = {
            "total": actual_total,
            "end_date": fiscal_date,
            "results": alt_set
        }

    else:
        # Annotate and get explorer _type filtered results
        exp = Explorer(alt_set, queryset)

        if _type == "budget_function":
            queryset = exp.budget_function()
        if _type == "budget_subfunction":
            queryset = exp.budget_subfunction()
        if _type == "federal_account":
            queryset = exp.federal_account()
        if _type == "program_activity":
            queryset = exp.program_activity()
        if _type == "object_class":
            queryset = exp.object_class()
        if _type == "agency":
            queryset = exp.agency()

        # Actual total value of filtered results
        actual_total = queryset.aggregate(total=Sum(
            "obligations_incurred_by_program_object_class_cpe"))["total"]

        result_set, expected_total = get_unreported_data_obj(
            queryset=queryset,
            filters=filters,
            limit=limit,
            spending_type=_type,
            actual_total=actual_total,
            fiscal_year=fiscal_year,
            fiscal_quarter=fiscal_quarter,
        )

        results = {
            "total": expected_total,
            "end_date": fiscal_date,
            "results": result_set
        }

    return results
Example #4
0
def validate_year(year=None):
    if year and not (year.isdigit() or year in ['all', 'latest']):
        raise InvalidParameterException('Invalid year: {}.'.format(year))
    return year
Example #5
0
    def get_queryset(self):
        # retrieve post request payload
        json_request = self.request.query_params

        # Retrieve fiscal_year & awarding_agency_id from Request
        fiscal_year = json_request.get("fiscal_year")
        awarding_agency_id = json_request.get("awarding_agency_id")

        # Optional Award Category
        award_category = json_request.get("award_category")

        # Required Query Parameters were not Provided
        if not (fiscal_year and awarding_agency_id):
            raise InvalidParameterException(
                "Missing one or more required query parameters: fiscal_year, awarding_agency_id"
            )
        if not check_valid_toptier_agency(awarding_agency_id):
            raise InvalidParameterException(
                "Awarding Agency ID provided must correspond to a toptier agency"
            )

        toptier_agency = Agency.objects.filter(
            id=awarding_agency_id).first().toptier_agency
        queryset = TransactionNormalized.objects.filter(
            federal_action_obligation__isnull=False)

        # DS-1655: if the AID is "097" (DOD), Include the branches of the military in the queryset
        if toptier_agency.toptier_code == DOD_CGAC:
            tta_list = DOD_ARMED_FORCES_CGAC
            queryset = queryset.filter(
                # Filter based on fiscal_year and awarding_category_id
                fiscal_year=fiscal_year,
                awarding_agency__toptier_agency__toptier_code__in=tta_list,
            )
        else:
            queryset = queryset.filter(
                # Filter based on fiscal_year and awarding_category_id
                fiscal_year=fiscal_year,
                awarding_agency__toptier_agency__toptier_code=toptier_agency.
                toptier_code,
            )

        queryset = queryset.annotate(
            award_category=F("award__category"),
            recipient_name=Coalesce(
                F("award__latest_transaction__assistance_data__awardee_or_recipient_legal"
                  ),
                F("award__latest_transaction__contract_data__awardee_or_recipient_legal"
                  ),
            ),
        )

        if award_category is not None:
            # Filter based on award_category
            if award_category != "other":
                queryset = queryset.filter(award_category=award_category)
            else:
                queryset = queryset.filter(
                    Q(award_category="insurance")
                    | Q(award_category="other")).annotate(award_category=Case(
                        When(award_category="insurance", then=Value("other")),
                        output_field=CharField()))

        # Sum Obligations for each Recipient
        queryset = (queryset.values(
            "award_category", "recipient_name").annotate(obligated_amount=Sum(
                "federal_action_obligation")).order_by("-obligated_amount"))

        return queryset
Example #6
0
def subaward_filter(filters, for_downloads=False):

    queryset = SubawardView.objects.all()

    recipient_scope_q = Q(recipient_location_country_code="USA") | Q(
        recipient_location_country_name="UNITED STATES")
    pop_scope_q = Q(pop_country_code="USA") | Q(
        pop_country_name="UNITED STATES")

    for key, value in filters.items():

        if value is None:
            raise InvalidParameterException("Invalid filter: " + key +
                                            " has null as its value.")

        key_list = [
            "keywords",
            "elasticsearch_keyword",
            "time_period",
            "award_type_codes",
            "prime_and_sub_award_types",
            "agencies",
            "legal_entities",
            "recipient_search_text",
            "recipient_scope",
            "recipient_locations",
            "recipient_type_names",
            "place_of_performance_scope",
            "place_of_performance_locations",
            "award_amounts",
            "award_ids",
            "program_numbers",
            "naics_codes",
            "psc_codes",
            "contract_pricing_type_codes",
            "set_aside_type_codes",
            "extent_competed_type_codes",
            "tas_codes",
        ]

        if key not in key_list:
            raise InvalidParameterException("Invalid filter: " + key +
                                            " does not exist.")

        if key == "keywords":

            def keyword_parse(keyword):
                # keyword_ts_vector & award_ts_vector are Postgres TS_vectors.
                # keyword_ts_vector = recipient_name + psc_description + subaward_description
                # award_ts_vector = piid + fain + uri + subaward_number
                filter_obj = Q(keyword_ts_vector=keyword) | Q(
                    award_ts_vector=keyword)
                # Commenting out until NAICS is associated with subawards in DAIMS 1.3.1
                # if keyword.isnumeric():
                #     filter_obj |= Q(naics_code__contains=keyword)
                if len(keyword) == 4 and PSC.objects.filter(
                        code__iexact=keyword).exists():
                    filter_obj |= Q(product_or_service_code__iexact=keyword)

                return filter_obj

            filter_obj = Q()
            for keyword in value:
                filter_obj |= keyword_parse(keyword)
            potential_duns = list(
                filter((lambda x: len(x) > 7 and len(x) < 10), value))
            if len(potential_duns) > 0:
                filter_obj |= Q(recipient_unique_id__in=potential_duns) | Q(
                    parent_recipient_unique_id__in=potential_duns)

            queryset = queryset.filter(filter_obj)

        elif key == "elasticsearch_keyword":
            keyword = value
            transaction_ids = elasticsearch_helper.get_download_ids(
                keyword=keyword, field="transaction_id")
            # flatten IDs
            transaction_ids = list(
                itertools.chain.from_iterable(transaction_ids))
            logger.info("Found {} transactions based on keyword: {}".format(
                len(transaction_ids), keyword))
            transaction_ids = [
                str(transaction_id) for transaction_id in transaction_ids
            ]
            queryset = queryset.filter(latest_transaction_id__isnull=False)

            # Prepare a SQL snippet to include in the predicate for searching an array of transaction IDs
            sql_fragment = '"subaward_view"."latest_transaction_id" = ANY(\'{{{}}}\'::int[])'  # int[] -> int array type
            queryset = queryset.extra(
                where=[sql_fragment.format(",".join(transaction_ids))])

        elif key == "time_period":
            min_date = API_SEARCH_MIN_DATE
            if for_downloads:
                min_date = API_MIN_DATE
            queryset &= combine_date_range_queryset(value, SubawardView,
                                                    min_date, API_MAX_DATE)

        elif key == "award_type_codes":
            queryset = queryset.filter(prime_award_type__in=value)

        elif key == "prime_and_sub_award_types":
            award_types = value.get("sub_awards")
            if award_types:
                queryset = queryset.filter(award_type__in=award_types)

        elif key == "agencies":
            # TODO: Make function to match agencies in award filter throwing dupe error
            funding_toptier = Q()
            funding_subtier = Q()
            awarding_toptier = Q()
            awarding_subtier = Q()
            for v in value:
                type = v["type"]
                tier = v["tier"]
                name = v["name"]
                if type == "funding":
                    if tier == "toptier":
                        funding_toptier |= Q(funding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if "toptier_name" in v:
                            funding_subtier |= Q(
                                funding_subtier_agency_name=name
                            ) & Q(
                                funding_toptier_agency_name=v["toptier_name"])
                        else:
                            funding_subtier |= Q(
                                funding_subtier_agency_name=name)

                elif type == "awarding":
                    if tier == "toptier":
                        awarding_toptier |= Q(
                            awarding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if "toptier_name" in v:
                            awarding_subtier |= Q(
                                awarding_subtier_agency_name=name
                            ) & Q(
                                awarding_toptier_agency_name=v["toptier_name"])
                        else:
                            awarding_subtier |= Q(
                                awarding_subtier_agency_name=name)

            awarding_queryfilter = Q()
            funding_queryfilter = Q()

            # Since these are Q filters, no DB hits for boolean checks
            if funding_toptier:
                funding_queryfilter |= funding_toptier
            if funding_subtier:
                funding_queryfilter |= funding_subtier
            if awarding_toptier:
                awarding_queryfilter |= awarding_toptier
            if awarding_subtier:
                awarding_queryfilter |= awarding_subtier

            queryset = queryset.filter(funding_queryfilter
                                       & awarding_queryfilter)

        elif key == "legal_entities":
            # This filter key has effectively become obsolete by recipient_search_text
            msg = 'API request included "{}" key. No filtering will occur with provided value "{}"'
            logger.info(msg.format(key, value))

        elif key == "recipient_search_text":

            def recip_string_parse(recipient_string):
                upper_recipient_string = recipient_string.upper()

                # recipient_name_ts_vector is a postgres TS_Vector
                filter_obj = Q(recipient_name_ts_vector=upper_recipient_string)
                if len(upper_recipient_string
                       ) == 9 and upper_recipient_string[:5].isnumeric():
                    filter_obj |= Q(recipient_unique_id=upper_recipient_string)
                return filter_obj

            filter_obj = Q()
            for recipient in value:
                filter_obj |= recip_string_parse(recipient)
            queryset = queryset.filter(filter_obj)

        elif key == "recipient_scope":
            if value == "domestic":
                queryset = queryset.filter(recipient_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(recipient_scope_q)
            else:
                raise InvalidParameterException(
                    "Invalid filter: recipient_scope type is invalid.")

        elif key == "recipient_locations":
            queryset = queryset.filter(
                geocode_filter_locations("recipient_location", value))

        elif key == "recipient_type_names":
            if len(value) != 0:
                queryset = queryset.filter(business_categories__overlap=value)

        elif key == "place_of_performance_scope":
            if value == "domestic":
                queryset = queryset.filter(pop_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(pop_scope_q)
            else:
                raise InvalidParameterException(
                    "Invalid filter: place_of_performance_scope is invalid.")

        elif key == "place_of_performance_locations":
            queryset = queryset.filter(geocode_filter_locations("pop", value))

        elif key == "award_amounts":
            queryset &= total_obligation_queryset(value, SubawardView, filters)

        elif key == "award_ids":
            queryset = build_award_ids_filter(queryset, value,
                                              ("piid", "fain"))

        # add "naics_codes" (column naics) after NAICS are mapped to subawards
        elif key in ("program_numbers", "psc_codes",
                     "contract_pricing_type_codes"):
            filter_to_col = {
                "program_numbers": "cfda_number",
                "psc_codes": "product_or_service_code",
                "contract_pricing_type_codes": "type_of_contract_pricing",
            }
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= SubawardView.objects.filter(
                    **{"{}__in".format(filter_to_col[key]): in_query})

        elif key in ("set_aside_type_codes", "extent_competed_type_codes"):
            or_queryset = Q()
            filter_to_col = {
                "set_aside_type_codes": "type_set_aside",
                "extent_competed_type_codes": "extent_competed"
            }
            in_query = [v for v in value]
            for v in value:
                or_queryset |= Q(
                    **{"{}__exact".format(filter_to_col[key]): in_query})
            queryset = queryset.filter(or_queryset)

        elif key == "tas_codes":
            queryset = build_tas_codes_filter(queryset, value)

    return queryset
Example #7
0
def validate_year(year=None):
    if year and not (year.isdigit() or year in ["all", "latest"]):
        raise InvalidParameterException("Invalid year: {}.".format(year))
    return year
Example #8
0
def validate_award_request(request_data):
    """Analyze request and raise any formatting errors as Exceptions"""

    _validate_required_parameters(request_data, ["award_levels", "filters"])
    filters = _validate_filters(request_data)
    award_levels = _validate_award_levels(request_data)

    json_request = {"download_types": award_levels, "filters": {}}

    # Set defaults of non-required parameters
    json_request["agency"] = request_data["filters"]["agency"] if request_data[
        "filters"].get("agency") else "all"
    json_request["columns"] = request_data.get("columns", [])
    json_request["file_format"] = str(request_data.get("file_format",
                                                       "csv")).lower()

    check_types_and_assign_defaults(filters, json_request["filters"],
                                    SHARED_AWARD_FILTER_DEFAULTS)

    json_request["filters"]["award_type_codes"] = _validate_award_type_codes(
        filters)

    _validate_and_update_locations(filters, json_request)
    _validate_tas_codes(filters, json_request)
    _validate_file_format(json_request)

    # Overriding all other filters if the keyword filter is provided in year-constraint download
    # Make sure this is after checking the award_levels
    constraint_type = request_data.get("constraint_type")
    if constraint_type == "year" and "elasticsearch_keyword" in filters:
        json_request["filters"] = {
            "elasticsearch_keyword": filters["elasticsearch_keyword"],
            "award_type_codes": list(award_type_mapping.keys()),
        }
        json_request["limit"] = settings.MAX_DOWNLOAD_LIMIT
        return json_request

    # Validate time periods
    total_range_count = validate_time_periods(filters, json_request)

    if constraint_type == "row_count":
        # Validate limit exists and is below MAX_DOWNLOAD_LIMIT
        json_request["limit"] = parse_limit(request_data)

        # Validate row_count-constrained filter types and assign defaults
        check_types_and_assign_defaults(filters, json_request["filters"],
                                        ROW_CONSTRAINT_FILTER_DEFAULTS)
    elif constraint_type == "year":
        # Validate combined total dates within one year (allow for leap years)
        if total_range_count > 366:
            raise InvalidParameterException(
                "Invalid Parameter: time_period total days must be within a year"
            )

        # Validate year-constrained filter types and assign defaults
        check_types_and_assign_defaults(filters, json_request["filters"],
                                        YEAR_CONSTRAINT_FILTER_DEFAULTS)
    else:
        raise InvalidParameterException(
            'Invalid parameter: constraint_type must be "row_count" or "year"')

    return json_request
Example #9
0
def _validate_required_parameters(request_data, required_parameters):
    for required_param in required_parameters:
        if required_param not in request_data:
            raise InvalidParameterException(
                "Missing one or more required body parameters: {}".format(
                    required_param))
Example #10
0
    def post(self, request):
        """Return list of downloads that match the requested params"""
        response_data = {}

        post_data = request.data
        agency_id = post_data.get('agency', None)
        fiscal_year = post_data.get('fiscal_year', None)
        download_type = post_data.get('type', None)

        required_params = {
            'agency': agency_id,
            'fiscal_year': fiscal_year,
            'type': download_type
        }
        for required_param, param_value in required_params.items():
            if param_value is None:
                raise InvalidParameterException(
                    'Missing one or more required query parameters: {}'.format(
                        required_param))

        # Populate regex
        fiscal_year_regex = str(fiscal_year) if fiscal_year else '\d{4}'
        download_type_regex = download_type.capitalize(
        ) if download_type else '(Contracts|Assistance)'

        cgac_regex = '.*'
        if agency_id and agency_id == 'all':
            cgac_regex = 'all'
        elif agency_id:
            cgac_codes = ToptierAgency.objects.filter(
                toptier_agency_id=agency_id).values('cgac_code')
            if cgac_codes:
                cgac_regex = cgac_codes[0]['cgac_code']
            else:
                raise InvalidParameterException(
                    '{} agency not found'.format(agency_id))
        monthly_dl_regex = '{}_{}_{}_Full_.*\.zip'.format(
            fiscal_year_regex, cgac_regex, download_type_regex)

        # Generate regex possible prefix
        prefixes = []
        for regex, add_regex in [(fiscal_year_regex, fiscal_year),
                                 (cgac_regex, agency_id),
                                 (download_type_regex, download_type)]:
            if not add_regex:
                break
            prefixes.append(regex)
        prefix = '_'.join(prefixes)

        # Get and filter the files we need
        bucket_name = self.s3_handler.bucketRoute
        region_name = S3Handler.REGION
        bucket = boto.s3.connect_to_region(region_name).get_bucket(bucket_name)
        monthly_dls_names = list(
            filter(
                re.compile(monthly_dl_regex).search,
                [key.name for key in bucket.list(prefix=prefix)]))
        # Generate response
        downloads = []
        for name in monthly_dls_names:
            name_data = re.findall('(.*)_(.*)_(.*)_Full_(.*)\.zip', name)[0]
            agency_name = None
            agency_abbr = None
            agency_cgac = name_data[1]
            if agency_cgac != 'all':
                agency = ToptierAgency.objects.filter(
                    cgac_code=agency_cgac).values('name', 'abbreviation')
                if agency:
                    agency_name = agency[0]['name']
                    agency_abbr = agency[0]['abbreviation']
            else:
                agency_name = 'All'
            # Simply adds dashes for the date, 20180101 -> 2018-01-01, could also use strftime
            updated_date = '-'.join(
                [name_data[3][:4], name_data[3][4:6], name_data[3][6:]])
            downloads.append({
                'fiscal_year':
                name_data[0],
                'agency_name':
                agency_name,
                'agency_acronym':
                agency_abbr,
                'type':
                name_data[2].lower(),
                'updated_date':
                updated_date,
                'file_name':
                name,
                'url':
                self.s3_handler.get_simple_url(file_name=name)
            })
        response_data['monthly_files'] = downloads
        return Response(response_data)
Example #11
0
    def validate_request(self, json_request):
        """Analyze request and raise any formatting errors as Exceptions"""
        constraint_type = json_request.get('constraint_type', None)

        # Overriding all other filters if the keyword filter is provided in year-constraint download
        if constraint_type == 'year' and 'elasticsearch_keyword' in json_request[
                'filters']:
            json_request['filters'] = {
                'elasticsearch_keyword':
                json_request['filters']['elasticsearch_keyword'],
                'award_type_codes':
                list(award_type_mapping.keys())
            }
            json_request['limit'] = settings.MAX_DOWNLOAD_LIMIT
            return json_request

        # Validate required parameters
        for required_param in ['award_levels', 'filters']:
            if required_param not in json_request:
                raise InvalidParameterException(
                    'Missing one or more required query parameters: {}'.format(
                        required_param))

        if not isinstance(json_request['award_levels'], list):
            raise InvalidParameterException(
                'Award levels parameter not provided as a list')
        elif len(json_request['award_levels']) == 0:
            raise InvalidParameterException(
                'At least one award level is required.')
        for award_level in json_request['award_levels']:
            if award_level not in VALUE_MAPPINGS:
                raise InvalidParameterException(
                    'Invalid award_level: {}'.format(award_level))

        if not isinstance(json_request['filters'], dict):
            raise InvalidParameterException(
                'Filters parameter not provided as a dict')
        elif len(json_request['filters']) == 0:
            raise InvalidParameterException('At least one filter is required.')

        # Set defaults of non-required parameters
        json_request['columns'] = json_request.get('columns', [])
        json_request['file_format'] = json_request.get('file_format', 'csv')

        # Validate shared filter types and assign defaults
        filters = json_request['filters']
        check_types_and_assign_defaults(filters, SHARED_FILTER_DEFAULTS)

        # Validate award type types
        if not filters.get('award_type_codes',
                           None) or len(filters['award_type_codes']) < 1:
            filters['award_type_codes'] = list(award_type_mapping.keys())
        for award_type_code in filters['award_type_codes']:
            if award_type_code not in award_type_mapping:
                raise InvalidParameterException(
                    'Invalid award_type: {}'.format(award_type_code))

        # Validate time periods
        total_range_count = validate_time_periods(filters)

        if constraint_type == 'row_count':
            # Validate limit exists and is below MAX_DOWNLOAD_LIMIT
            json_request['limit'] = parse_limit(json_request)

            # Validate row_count-constrainted filter types and assign defaults
            check_types_and_assign_defaults(filters,
                                            ROW_CONSTRAINT_FILTER_DEFAULTS)
        elif constraint_type == 'year':
            # Validate combined total dates within one year (allow for leap years)
            if total_range_count > 366:
                raise InvalidParameterException(
                    'Invalid Parameter: time_period total days must be within a year'
                )

            # Validate year-constrainted filter types and assign defaults
            check_types_and_assign_defaults(filters,
                                            YEAR_CONSTRAINT_FILTER_DEFAULTS)
        else:
            raise InvalidParameterException(
                'Invalid parameter: constraint_type must be "row_count" or "year"'
            )

        return json_request
Example #12
0
    def post(self, request):
        """Return list of agencies if no POST data is provided.
        Otherwise, returns sub_agencies/federal_accounts associated with the agency provided"""
        response_data = {
            'agencies': [],
            'sub_agencies': [],
            'federal_accounts': []
        }
        if not self.sub_agencies_map:
            # populate the sub_agencies dictionary
            self.pull_modified_agencies_cgacs_subtiers()
        used_cgacs = set(self.sub_agencies_map.values())

        agency_id = None
        post_data = request.data
        if post_data:
            if 'agency' not in post_data:
                raise InvalidParameterException(
                    'Missing one or more required query parameters: agency')
            agency_id = post_data['agency']

        # Get all the top tier agencies
        toptier_agencies = list(
            ToptierAgency.objects.filter(cgac_code__in=used_cgacs).values(
                'name', 'toptier_agency_id', 'cgac_code'))

        if not agency_id:
            # Return all the agencies if no agency id provided
            cfo_agencies = sorted(
                list(
                    filter(lambda agency: agency['cgac_code'] in CFO_CGACS,
                           toptier_agencies)),
                key=lambda agency: CFO_CGACS.index(agency['cgac_code']))
            other_agencies = sorted([
                agency
                for agency in toptier_agencies if agency not in cfo_agencies
            ],
                                    key=lambda agency: agency['name'])
            response_data['agencies'] = {
                'cfo_agencies': cfo_agencies,
                'other_agencies': other_agencies
            }
        else:
            # Get the top tier agency object based on the agency id provided
            top_tier_agency = list(
                filter(
                    lambda toptier: toptier['toptier_agency_id'] == agency_id,
                    toptier_agencies))
            if not top_tier_agency:
                raise InvalidParameterException('Agency ID not found')
            top_tier_agency = top_tier_agency[0]
            # Get the sub agencies and federal accounts associated with that top tier agency
            # Removed distinct subtier_agency_name since removing subtiers with multiple codes that aren't in the
            # modified list
            response_data['sub_agencies'] = Agency.objects.filter(toptier_agency_id=agency_id)\
                .values(subtier_agency_name=F('subtier_agency__name'),
                        subtier_agency_code=F('subtier_agency__subtier_code'))\
                .order_by('subtier_agency_name')
            # Tried converting this to queryset filtering but ran into issues trying to
            # double check the right used subtier_agency by cross checking the cgac_code
            # see the last 2 lines of the list comprehension below
            response_data['sub_agencies'] = [
                subagency for subagency in response_data['sub_agencies']
                if subagency['subtier_agency_code'] in self.sub_agencies_map
                and self.sub_agencies_map[subagency['subtier_agency_code']] ==
                top_tier_agency['cgac_code']
            ]
            for subagency in response_data['sub_agencies']:
                del subagency['subtier_agency_code']

            response_data['federal_accounts'] = FederalAccount.objects\
                .filter(agency_identifier=top_tier_agency['cgac_code'])\
                .values(federal_account_name=F('account_title'), federal_account_id=F('id'))\
                .order_by('federal_account_name')
        return Response(response_data)
Example #13
0
    def process_filters(self, request_data):
        """Filter function to update Bulk Download parameters to shared parameters"""

        # Validate filter parameter
        filters = request_data.get('filters', None)
        if not filters:
            raise InvalidParameterException(
                'Missing one or more required query parameters: filters')

        # Validate keyword search first, remove all other filters
        if 'keyword' in filters:
            request_data['filters'] = {
                'elasticsearch_keyword': filters['keyword']
            }
            return

        # Validate other parameters previously required by the Bulk Download endpoint
        for required_param in [
                'award_types', 'agency', 'date_type', 'date_range'
        ]:
            if required_param not in filters:
                raise InvalidParameterException(
                    'Missing one or more required query parameters: {}'.format(
                        required_param))

        # Replacing award_types with award_type_codes
        filters['award_type_codes'] = []
        try:
            for award_type_code in filters['award_types']:
                if award_type_code in all_award_types_mappings:
                    filters['award_type_codes'].extend(
                        all_award_types_mappings[award_type_code])
                else:
                    raise InvalidParameterException(
                        'Invalid award_type: {}'.format(award_type_code))
            del filters['award_types']
        except TypeError:
            raise InvalidParameterException(
                'award_types parameter not provided as a list')

        # Replacing date_range with time_period
        date_range_copied = filters['date_range'].copy()
        date_range_copied['date_type'] = filters['date_type']
        filters['time_period'] = [date_range_copied]
        del filters['date_range']
        del filters['date_type']

        # Replacing agency with agencies
        if filters['agency'] != 'all':
            toptier_name = (ToptierAgency.objects.filter(
                toptier_agency_id=filters['agency']).values('name'))
            if not toptier_name:
                raise InvalidParameterException(
                    'Toptier ID not found: {}'.format(filters['agency']))
            toptier_name = toptier_name[0]['name']
            filters['agencies'] = [{
                'type': 'awarding',
                'tier': 'toptier',
                'name': toptier_name
            }]
            if 'sub_agency' in filters:
                if filters['sub_agency']:
                    filters['agencies'].append({
                        'type': 'awarding',
                        'tier': 'subtier',
                        'name': filters['sub_agency']
                    })
                del filters['sub_agency']
        del filters['agency']

        request_data['filters'] = filters
def account_download_filter(account_type,
                            download_table,
                            filters,
                            account_level="treasury_account"):
    query_filters = {}
    tas_id = "treasury_account_identifier" if account_type == "account_balances" else "treasury_account"

    # Filter by Agency, if provided
    if filters.get("agency", False) and filters["agency"] != "all":
        agency = ToptierAgency.objects.filter(
            toptier_agency_id=filters["agency"]).first()
        if not agency:
            raise InvalidParameterException(
                "Agency with that ID does not exist")
        query_filters[
            f"{tas_id}__funding_toptier_agency_id"] = agency.toptier_agency_id

    # Filter by Federal Account, if provided
    if filters.get("federal_account",
                   False) and filters["federal_account"] != "all":
        federal_account_obj = FederalAccount.objects.filter(
            id=filters["federal_account"]).first()
        if federal_account_obj:
            query_filters[f"{tas_id}__federal_account__id"] = filters[
                "federal_account"]
        else:
            raise InvalidParameterException(
                "Federal Account with that ID does not exist")

    # Filter by Budget Function, if provided
    if filters.get("budget_function",
                   False) and filters["budget_function"] != "all":
        query_filters[f"{tas_id}__budget_function_code"] = filters[
            "budget_function"]

    # Filter by Budget SubFunction, if provided
    if filters.get("budget_subfunction",
                   False) and filters["budget_subfunction"] != "all":
        query_filters[f"{tas_id}__budget_subfunction_code"] = filters[
            "budget_subfunction"]

    # Filter by Fiscal Year and Quarter
    reporting_period_start, reporting_period_end, start_date, end_date = retrieve_fyq_filters(
        account_type, account_level, filters)
    query_filters[reporting_period_start] = start_date
    query_filters[reporting_period_end] = end_date

    # Create the base queryset
    queryset = download_table.objects

    if account_type in ["account_balances", "object_class_program_activity"]:
        # only include the latest TASs, not all of them
        unique_id_mapping = {
            "account_balances":
            "appropriation_account_balances_id",
            "object_class_program_activity":
            "financial_accounts_by_program_activity_object_class_id",
        }

        unique_columns_mapping = {
            "account_balances":
            ["treasury_account_identifier__tas_rendering_label"],
            "object_class_program_activity": [
                "treasury_account__tas_rendering_label",
                "program_activity__program_activity_code",
                "object_class__object_class",
                "object_class__direct_reimbursable",
            ],
        }

        # DEV-5180 check if cares act features are enabled in production yet
        if settings.ENABLE_CARES_ACT_FEATURES is True:
            unique_columns_mapping["object_class_program_activity"].append(
                "disaster_emergency_fund__code")
            unique_columns_mapping["object_class_program_activity"].append(
                "disaster_emergency_fund__title")

        distinct_cols = unique_columns_mapping[account_type]
        order_by_cols = distinct_cols + ["-reporting_period_start", "-pk"]
        latest_ids_q = (download_table.objects.filter(
            **query_filters).distinct(*distinct_cols).order_by(
                *order_by_cols).values(unique_id_mapping[account_type]))
        if latest_ids_q.exists():
            query_filters[
                f"{unique_id_mapping[account_type]}__in"] = latest_ids_q

    # Make derivations based on the account level
    if account_level == "treasury_account":
        queryset = generate_treasury_account_query(queryset, account_type,
                                                   tas_id)
    elif account_level == "federal_account":
        queryset = generate_federal_account_query(queryset, account_type,
                                                  tas_id)
    else:
        raise InvalidParameterException(
            'Invalid Parameter: account_level must be either "federal_account" or '
            '"treasury_account"')

    # Apply filter and return
    return queryset.filter(**query_filters)
Example #15
0
    def post(self, request):
        json_request = request.data
        self.scope = json_request.get("scope")
        self.filters = json_request.get("filters", {})
        self.geo_layer = json_request.get("geo_layer")
        self.geo_layer_filters = json_request.get("geo_layer_filters")

        fields_list = []  # fields to include in the aggregate query

        loc_dict = {
            'state': 'state_code',
            'county': 'county_code',
            'district': 'congressional_code'
        }

        model_dict = {
            'place_of_performance': 'pop',
            'recipient_location': 'recipient_location'
        }

        # Build the query based on the scope fields and geo_layers
        # Fields not in the reference objects above then request is invalid

        scope_field_name = model_dict.get(self.scope)
        loc_field_name = loc_dict.get(self.geo_layer)
        loc_lookup = '{}_{}'.format(scope_field_name, loc_field_name)

        if scope_field_name is None:
            raise InvalidParameterException("Invalid request parameters: scope")

        if loc_field_name is None:
            raise InvalidParameterException("Invalid request parameters: geo_layer")

        self.queryset, self.matview_model = spending_by_geography(self.filters)

        if self.geo_layer == 'state':
            # State will have one field (state_code) containing letter A-Z
            kwargs = {
                '{}_country_code'.format(scope_field_name): 'USA',
                'federal_action_obligation__isnull': False
            }

            # Only state scope will add its own state code
            # State codes are consistent in db ie AL, AK
            fields_list.append(loc_lookup)

            state_response = {
                'scope': self.scope,
                'geo_layer': self.geo_layer,
                'results': self.state_results(kwargs, fields_list, loc_lookup)
            }

            return Response(state_response)

        else:
            # County and district scope will need to select multiple fields
            # State code is needed for county/district aggregation
            state_lookup = '{}_{}'.format(scope_field_name, loc_dict['state'])
            fields_list.append(state_lookup)

            # Adding regex to county/district codes to remove entries with letters since
            # can't be surfaced by map
            kwargs = {'federal_action_obligation__isnull': False}

            if self.geo_layer == 'county':
                # County name added to aggregation since consistent in db
                county_name = '{}_{}'.format(scope_field_name, 'county_name')
                fields_list.append(county_name)
                self.county_district_queryset(
                    kwargs,
                    fields_list,
                    loc_lookup,
                    state_lookup,
                    scope_field_name
                )

                county_response = {
                    'scope': self.scope,
                    'geo_layer': self.geo_layer,
                    'results': self.county_results(state_lookup, county_name)
                }

                return Response(county_response)
            else:
                self.county_district_queryset(
                    kwargs,
                    fields_list,
                    loc_lookup,
                    state_lookup,
                    scope_field_name
                )

                district_response = {
                    'scope': self.scope,
                    'geo_layer': self.geo_layer,
                    'results': self.district_results(state_lookup)
                }

                return Response(district_response)
Example #16
0
def _validate_file_format(json_request: dict) -> None:
    val = json_request["file_format"]
    if val not in FILE_FORMATS:
        msg = f"'{val}' is not an acceptable value for 'file_format'. Valid options: {tuple(FILE_FORMATS.keys())}"
        raise InvalidParameterException(msg)
Example #17
0
    def post(self, request):
        """Return all budget function/subfunction titles matching the provided search text"""
        json_request = request.data
        group = json_request.get('group', None)
        filters = json_request.get('filters', None)

        if group is None:
            raise InvalidParameterException('Missing one or more required request parameters: group')
        if filters is None:
            raise InvalidParameterException('Missing one or more required request parameters: filters')
        potential_groups = ['quarter', 'fiscal_year', 'month', 'fy', 'q', 'm']
        if group not in potential_groups:
            raise InvalidParameterException('group does not have a valid value')

        queryset = spending_over_time(filters)
        filter_types = filters['award_type_codes'] if 'award_type_codes' in filters else award_type_mapping

        # define what values are needed in the sql query
        queryset = queryset.values('action_date', 'federal_action_obligation', 'original_loan_subsidy_cost')

        # build response
        response = {'group': group, 'results': []}
        nested_order = ''

        group_results = OrderedDict()  # list of time_period objects ie {"fy": "2017", "quarter": "3"} : 1000

        if group == 'fy' or group == 'fiscal_year':

            fy_set = sum_transaction_amount(queryset.values('fiscal_year'), filter_types=filter_types)

            for trans in fy_set:
                key = {'fiscal_year': str(trans['fiscal_year'])}
                key = str(key)
                group_results[key] = trans['transaction_amount']

        elif group == 'm' or group == 'month':

            month_set = queryset.annotate(month=ExtractMonth('action_date')) \
                .values('fiscal_year', 'month')
            month_set = sum_transaction_amount(month_set, filter_types=filter_types)

            for trans in month_set:
                # Convert month to fiscal month
                fiscal_month = generate_fiscal_month(date(year=2017, day=1, month=trans['month']))

                key = {'fiscal_year': str(trans['fiscal_year']), 'month': str(fiscal_month)}
                key = str(key)
                group_results[key] = trans['transaction_amount']
            nested_order = 'month'
        else:  # quarterly, take months and add them up

            month_set = queryset.annotate(month=ExtractMonth('action_date')) \
                .values('fiscal_year', 'month')
            month_set = sum_transaction_amount(month_set, filter_types=filter_types)

            for trans in month_set:
                # Convert month to quarter
                quarter = FiscalDate(2017, trans['month'], 1).quarter

                key = {'fiscal_year': str(trans['fiscal_year']), 'quarter': str(quarter)}
                key = str(key)

                # If key exists {fy : quarter}, aggregate
                if group_results.get(key) is None:
                    group_results[key] = trans['transaction_amount']
                else:
                    if trans['transaction_amount']:
                        group_results[key] = group_results.get(key) + trans['transaction_amount']
                    else:
                        group_results[key] = group_results.get(key)
            nested_order = 'quarter'

        # convert result into expected format, sort by key to meet front-end specs
        results = []
        # Expected results structure
        # [{
        # 'time_period': {'fy': '2017', 'quarter': '3'},
        # 	'aggregated_amount': '200000000'
        # }]
        sorted_group_results = sorted(
            group_results.items(),
            key=lambda k: (
                ast.literal_eval(k[0])['fiscal_year'],
                int(ast.literal_eval(k[0])[nested_order])) if nested_order else (ast.literal_eval(k[0])['fiscal_year']))

        for key, value in sorted_group_results:
            key_dict = ast.literal_eval(key)
            result = {'time_period': key_dict, 'aggregated_amount': float(value) if value else float(0)}
            results.append(result)
        response['results'] = results

        return Response(response)
Example #18
0
def generate_csvs(download_job, sqs_message=None):
    """Derive the relevant file location and write CSVs to it"""
    start_time = time.time()

    # Parse data from download_job
    json_request = json.loads(download_job.json_request)
    columns = json_request.get('columns', None)
    limit = json_request.get('limit', None)

    file_name = start_download(download_job)
    try:
        # Create temporary files and working directory
        file_path = settings.CSV_LOCAL_PATH + file_name
        working_dir = os.path.splitext(file_path)[0]
        if not os.path.exists(working_dir):
            os.mkdir(working_dir)

        write_to_log(message='Generating {}'.format(file_name),
                     download_job=download_job)

        # Generate sources from the JSON request object
        sources = get_csv_sources(json_request)
        for source in sources:
            # Parse and write data to the file
            download_job.number_of_columns = max(
                download_job.number_of_columns, len(source.columns(columns)))
            parse_source(source, columns, download_job, working_dir,
                         start_time, sqs_message, file_path, limit)
        download_job.file_size = os.stat(file_path).st_size
    except Exception as e:
        # Set error message; job_status_id will be set in generate_zip.handle()
        download_job.error_message = 'An exception was raised while attempting to write the file:\n{}'.format(
            str(e))
        download_job.save()
        if isinstance(e, InvalidParameterException):
            raise InvalidParameterException(e)
        else:
            raise Exception(download_job.error_message) from e
    finally:
        # Remove working directory
        if os.path.exists(working_dir):
            shutil.rmtree(working_dir)

    try:
        # push file to S3 bucket, if not local
        if not settings.IS_LOCAL:
            bucket = settings.BULK_DOWNLOAD_S3_BUCKET_NAME
            region = settings.USASPENDING_AWS_REGION
            start_uploading = time.time()
            multipart_upload(bucket, region, file_path,
                             os.path.basename(file_path))
            write_to_log(
                message='Uploading took {} seconds'.format(time.time() -
                                                           start_uploading),
                download_job=download_job)
    except Exception as e:
        # Set error message; job_status_id will be set in generate_zip.handle()
        download_job.error_message = 'An exception was raised while attempting to upload the file:\n{}'.format(
            str(e))
        download_job.save()
        if isinstance(e, InvalidParameterException):
            raise InvalidParameterException(e)
        else:
            raise Exception(download_job.error_message) from e
    finally:
        # Remove generated file
        if not settings.IS_LOCAL and os.path.exists(file_path):
            os.remove(file_path)

    return finish_download(download_job)