def county_district_queryset(self, kwargs, fields_list, loc_lookup, state_lookup, scope_field_name):
        # Filtering queryset to specific county/districts if requested
        # Since geo_layer_filters comes as concat of state fips and county/district codes
        # need to split for the geocode_filter
        if self.geo_layer_filters:
            geo_layers_list = [
                {'state': fips_to_code.get(x[:2]), self.geo_layer: x[2:], 'country': 'USA'}
                for x in self.geo_layer_filters
            ]
            self.queryset = self.queryset.filter(geocode_filter_locations(scope_field_name, geo_layers_list, True))
        else:
            # Adding null, USA, not number filters for specific partial index when not using geocode_filter
            kwargs['{}__{}'.format(loc_lookup, 'isnull')] = False
            kwargs['{}__{}'.format(state_lookup, 'isnull')] = False
            kwargs['{}_country_code'.format(scope_field_name)] = 'USA'
            kwargs['{}__{}'.format(loc_lookup, 'iregex')] = r'^[0-9]*(\.\d+)?$'

        # Turn county/district codes into float since inconsistent in database
        # Codes in location table ex: '01', '1', '1.0'
        # Cast will group codes as a float and will combine inconsistent codes
        self.geo_queryset = self.queryset.filter(**kwargs) \
            .values(*fields_list) \
            .annotate(code_as_float=Cast(loc_lookup, FloatField()))

        if self.subawards:
            self.geo_queryset = self.geo_queryset.annotate(transaction_amount=Sum('amount'))
        else:
            self.geo_queryset = self.geo_queryset \
                .annotate(transaction_amount=Sum('generated_pragmatic_obligation')) \
                .values('transaction_amount', 'code_as_float', *fields_list)

        return self.geo_queryset
    def county_district_queryset(self, kwargs, fields_list, loc_lookup, state_lookup, scope_field_name):
        # Filtering queryset to specific county/districts if requested
        # Since geo_layer_filters comes as concat of state fips and county/district codes
        # need to split for the geocode_filter
        if self.geo_layer_filters:
            geo_layers_list = [
                {"state": fips_to_code.get(x[:2]), self.geo_layer: x[2:], "country": "USA"}
                for x in self.geo_layer_filters
            ]
            self.queryset = self.queryset.filter(geocode_filter_locations(scope_field_name, geo_layers_list))
        else:
            # Adding null, USA, not number filters for specific partial index when not using geocode_filter
            kwargs["{}__{}".format(loc_lookup, "isnull")] = False
            kwargs["{}__{}".format(state_lookup, "isnull")] = False
            kwargs["{}_country_code".format(scope_field_name)] = "USA"
            kwargs["{}__{}".format(loc_lookup, "iregex")] = r"^[0-9]*(\.\d+)?$"

        # Turn county/district codes into float since inconsistent in database
        # Codes in location table ex: '01', '1', '1.0'
        # Cast will group codes as a float and will combine inconsistent codes
        self.geo_queryset = (
            self.queryset.filter(**kwargs).values(*fields_list).annotate(code_as_float=Cast(loc_lookup, FloatField()))
        )

        if self.subawards:
            self.geo_queryset = self.geo_queryset.annotate(transaction_amount=Sum("amount"))
        else:
            self.geo_queryset = self.geo_queryset.annotate(
                transaction_amount=Sum("generated_pragmatic_obligation")
            ).values("transaction_amount", "code_as_float", *fields_list)

        return self.geo_queryset
Esempio n. 3
0
    def county_district_queryset(self, kwargs, fields_list, loc_lookup, state_lookup, scope_field_name):
        # Filtering queryset to specific county/districts if requested
        # Since geo_layer_filters comes as concat of state fips and county/district codes
        # need to split for the geocode_filter
        if self.geo_layer_filters:
            self.queryset &= geocode_filter_locations(scope_field_name, [
                {'state': fips_to_code.get(x[:2]), self.geo_layer: x[2:], 'country': 'USA'}
                for x in self.geo_layer_filters
            ], self.model_name, not self.subawards)
        else:
            # Adding null, USA, not number filters for specific partial index when not using geocode_filter
            kwargs['{}__{}'.format(loc_lookup, 'isnull')] = False
            kwargs['{}__{}'.format(state_lookup, 'isnull')] = False
            kwargs['{}_{}country_code'.format(scope_field_name, '_location_' if self.subawards else '')] = 'USA'
            kwargs['{}__{}'.format(loc_lookup, 'iregex')] = r'^[0-9]*(\.\d+)?$'

        # Turn county/district codes into float since inconsistent in database
        # Codes in location table ex: '01', '1', '1.0'
        # Cast will group codes as a float and will combine inconsistent codes
        self.geo_queryset = self.queryset.filter(**kwargs) \
            .values(*fields_list) \
            .annotate(code_as_float=Cast(loc_lookup, FloatField()))
        filter_types = self.filters['award_type_codes'] if 'award_type_codes' in self.filters else award_type_mapping
        self.geo_queryset = sum_transaction_amount(self.geo_queryset, filter_types=filter_types) if not self.subawards \
            else self.geo_queryset.annotate(transaction_amount=Sum('amount'))

        return self.geo_queryset
    def county_district_queryset(self, kwargs, fields_list, loc_lookup, state_lookup, scope_field_name):
        # Filtering queryset to specific county/districts if requested
        # Since geo_layer_filters comes as concat of state fips and county/district codes
        # need to split for the geocode_filter
        if self.geo_layer_filters:
            geo_layers_list = [
                {'state': fips_to_code.get(x[:2]), self.geo_layer: x[2:], 'country': 'USA'}
                for x in self.geo_layer_filters
            ]
            self.queryset = self.queryset.filter(geocode_filter_locations(scope_field_name, geo_layers_list, True))
        else:
            # Adding null, USA, not number filters for specific partial index when not using geocode_filter
            kwargs['{}__{}'.format(loc_lookup, 'isnull')] = False
            kwargs['{}__{}'.format(state_lookup, 'isnull')] = False
            kwargs['{}_country_code'.format(scope_field_name)] = 'USA'
            kwargs['{}__{}'.format(loc_lookup, 'iregex')] = r'^[0-9]*(\.\d+)?$'

        # Turn county/district codes into float since inconsistent in database
        # Codes in location table ex: '01', '1', '1.0'
        # Cast will group codes as a float and will combine inconsistent codes
        self.geo_queryset = self.queryset.filter(**kwargs) \
            .values(*fields_list) \
            .annotate(code_as_float=Cast(loc_lookup, FloatField()))

        if self.subawards:
            self.geo_queryset = self.geo_queryset.annotate(transaction_amount=Sum('amount'))
        else:
            self.geo_queryset = self.geo_queryset \
                .annotate(transaction_amount=Sum('generated_pragmatic_obligation')) \
                .values('transaction_amount', 'code_as_float', *fields_list)

        return self.geo_queryset
def test_geocode_filter_locations(award_data_fixture, refresh_matviews):

    to = UniversalAwardView.objects

    values = [
        {
            "city": "McCool Junction",
            "state": "TX",
            "country": "USA"
        },
        {
            "city": "Burbank",
            "state": "CA",
            "country": "USA"
        },
    ]

    assert to.filter(geocode_filter_locations("nothing", [],
                                              True)).count() == 5
    assert to.filter(geocode_filter_locations("pop", values,
                                              True)).count() == 1
    assert to.filter(
        geocode_filter_locations("recipient_location", values,
                                 True)).count() == 1

    values = [
        {
            "city": "Houston",
            "state": "TX",
            "country": "USA"
        },
        {
            "city": "McCool Junction",
            "state": "TX",
            "country": "USA"
        },
    ]

    assert to.filter(geocode_filter_locations("pop", values,
                                              True)).count() == 1
    assert to.filter(
        geocode_filter_locations("recipient_location", values,
                                 True)).count() == 0
def test_geocode_filter_locations(award_data_fixture):

    to = ContractAwardSearchMatview.objects

    values = [
        {
            "city": "McCool Junction",
            "state": "TX",
            "country": "USA"
        },
        {
            "city": "Burbank",
            "state": "CA",
            "country": "USA"
        },
    ]

    assert to.filter(geocode_filter_locations("nothing", [])).count() == 5
    assert to.filter(geocode_filter_locations("pop", values)).count() == 1
    assert to.filter(geocode_filter_locations("recipient_location",
                                              values)).count() == 1

    values = [
        {
            "city": "Houston",
            "state": "TX",
            "country": "USA"
        },
        {
            "city": "McCool Junction",
            "state": "TX",
            "country": "USA"
        },
    ]

    assert to.filter(geocode_filter_locations("pop", values)).count() == 1
    assert to.filter(geocode_filter_locations("recipient_location",
                                              values)).count() == 0
def matview_search_filter(filters, model, for_downloads=False):
    queryset = model.objects.all()

    recipient_scope_q = Q(recipient_location_country_code="USA") | Q(
        recipient_location_country_name="UNITED STATES")
    pop_scope_q = Q(pop_country_code="USA") | Q(
        pop_country_name="UNITED STATES")

    faba_flag = False
    faba_queryset = FinancialAccountsByAwards.objects.filter(
        award__isnull=False)

    for key, value in filters.items():
        if value is None:
            raise InvalidParameterException('Invalid filter: ' + key +
                                            ' has null as its value.')

        key_list = [
            'keywords',
            'elasticsearch_keyword',
            'time_period',
            'award_type_codes',
            'agencies',
            'legal_entities',
            'recipient_id',
            'recipient_search_text',
            'recipient_scope',
            'recipient_locations',
            'recipient_type_names',
            'place_of_performance_scope',
            'place_of_performance_locations',
            'award_amounts',
            'award_ids',
            'program_numbers',
            'naics_codes',
            'psc_codes',
            'contract_pricing_type_codes',
            'set_aside_type_codes',
            'extent_competed_type_codes',
            # next 3 keys used by federal account page
            'federal_account_ids',
            'object_class',
            'program_activity'
        ]

        if key not in key_list:
            raise InvalidParameterException('Invalid filter: ' + key +
                                            ' does not exist.')

        if key == "keywords":

            def keyword_parse(keyword):
                # keyword_ts_vector & award_ts_vector are Postgres TS_vectors.
                # keyword_ts_vector = recipient_name + naics_code + naics_description
                #     + psc_description + awards_description
                # award_ts_vector = piid + fain + uri
                filter_obj = Q(keyword_ts_vector=keyword) | Q(
                    award_ts_vector=keyword)
                if keyword.isnumeric():
                    filter_obj |= Q(naics_code__contains=keyword)
                if len(keyword) == 4 and PSC.objects.all().filter(
                        code__iexact=keyword).exists():
                    filter_obj |= Q(product_or_service_code__iexact=keyword)

                return filter_obj

            filter_obj = Q()
            for keyword in value:
                filter_obj |= keyword_parse(keyword)
            potential_duns = list(
                filter((lambda x: len(x) > 7 and len(x) < 10), value))
            if len(potential_duns) > 0:
                filter_obj |= Q(recipient_unique_id__in=potential_duns) | \
                    Q(parent_recipient_unique_id__in=potential_duns)

            queryset = queryset.filter(filter_obj)

        elif key == "elasticsearch_keyword":
            keyword = " ".join(value) if isinstance(value, list) else value
            transaction_ids = elasticsearch_helper.get_download_ids(
                keyword=keyword, field='transaction_id')
            # flatten IDs
            transaction_ids = list(
                itertools.chain.from_iterable(transaction_ids))
            logger.info('Found {} transactions based on keyword: {}'.format(
                len(transaction_ids), keyword))
            transaction_ids = [
                str(transaction_id) for transaction_id in transaction_ids
            ]
            queryset = queryset.extra(where=[
                '"transaction_normalized"."id" = ANY(\'{{{}}}\'::int[])'.
                format(','.join(transaction_ids))
            ])

        elif key == "time_period":
            min_date = API_SEARCH_MIN_DATE
            if for_downloads:
                min_date = API_MIN_DATE
            queryset &= combine_date_range_queryset(value, model, min_date,
                                                    API_MAX_DATE)

        elif key == "award_type_codes":
            queryset = queryset.filter(type__in=value)

        elif key == "agencies":
            # TODO: Make function to match agencies in award filter throwing dupe error
            funding_toptier = Q()
            funding_subtier = Q()
            awarding_toptier = Q()
            awarding_subtier = Q()
            for v in value:
                type = v["type"]
                tier = v["tier"]
                name = v["name"]
                if type == "funding":
                    if tier == "toptier":
                        funding_toptier |= Q(funding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if 'toptier_name' in v:
                            funding_subtier |= (
                                Q(funding_subtier_agency_name=name) &
                                Q(funding_toptier_agency_name=v['toptier_name']
                                  ))
                        else:
                            funding_subtier |= Q(
                                funding_subtier_agency_name=name)

                elif type == "awarding":
                    if tier == "toptier":
                        awarding_toptier |= Q(
                            awarding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if 'toptier_name' in v:
                            awarding_subtier |= (Q(
                                awarding_subtier_agency_name=name
                            ) & Q(
                                awarding_toptier_agency_name=v['toptier_name'])
                                                 )
                        else:
                            awarding_subtier |= Q(
                                awarding_subtier_agency_name=name)

            awarding_queryfilter = Q()
            funding_queryfilter = Q()

            # Since these are Q filters, no DB hits for boolean checks
            if funding_toptier:
                funding_queryfilter |= funding_toptier
            if funding_subtier:
                funding_queryfilter |= funding_subtier
            if awarding_toptier:
                awarding_queryfilter |= awarding_toptier
            if awarding_subtier:
                awarding_queryfilter |= awarding_subtier

            queryset = queryset.filter(funding_queryfilter
                                       & awarding_queryfilter)

        elif key == "legal_entities":
            # This filter key has effectively become obsolete by recipient_search_text
            msg = 'API request included "{}" key. No filtering will occur with provided value "{}"'
            logger.info(msg.format(key, value))
            # in_query = [v for v in value]
            # if len(in_query) != 0:
            #     queryset &= model.objects.filter(recipient_id__in=in_query)

        elif key == "recipient_search_text":
            all_filters_obj = Q()
            for recip in value:
                upper_recipient_string = str(recip).upper()
                # recipient_name_ts_vector is a postgres TS_Vector
                filter_obj = Q(recipient_name_ts_vector=upper_recipient_string)
                if len(upper_recipient_string
                       ) == 9 and upper_recipient_string[:5].isnumeric():
                    filter_obj |= Q(recipient_unique_id=upper_recipient_string)
                all_filters_obj |= filter_obj
            queryset = queryset.filter(all_filters_obj)

        elif key == "recipient_id":
            filter_obj = Q()
            recipient_hash = value[:-2]

            if value.endswith(
                    'P'
            ):  # For parent types, gather all of the children's transactions
                parent_duns_rows = (RecipientProfile.objects.filter(
                    recipient_hash=recipient_hash,
                    recipient_level='P').values('recipient_unique_id'))
                if len(parent_duns_rows) == 1:
                    parent_duns = parent_duns_rows[0]['recipient_unique_id']
                    filter_obj = Q(parent_recipient_unique_id=parent_duns)
                elif len(parent_duns_rows) > 2:
                    # shouldn't occur
                    raise InvalidParameterException(
                        'Non-unique parent record found in RecipientProfile')
            elif value.endswith('C'):
                filter_obj = Q(recipient_hash=recipient_hash,
                               parent_recipient_unique_id__isnull=False)
            else:
                # "R" recipient level
                filter_obj = Q(recipient_hash=recipient_hash,
                               parent_recipient_unique_id__isnull=True)
            queryset = queryset.filter(filter_obj)

        elif key == "recipient_scope":
            if value == "domestic":
                queryset = queryset.filter(recipient_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(recipient_scope_q)
            else:
                raise InvalidParameterException(
                    'Invalid filter: recipient_scope type is invalid.')

        elif key == "recipient_locations":
            queryset = queryset.filter(
                geocode_filter_locations("recipient_location", value, True))

        elif key == "recipient_type_names":
            if len(value) != 0:
                queryset = queryset.filter(business_categories__overlap=value)

        elif key == "place_of_performance_scope":
            if value == "domestic":
                queryset = queryset.filter(pop_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(pop_scope_q)
            else:
                raise InvalidParameterException(
                    'Invalid filter: place_of_performance_scope is invalid.')

        elif key == "place_of_performance_locations":
            queryset = queryset.filter(
                geocode_filter_locations("pop", value, True))

        elif key == "award_amounts":
            queryset &= total_obligation_queryset(value, model, filters)

        elif key == "award_ids":
            filter_obj = Q()
            for val in value:
                # award_id_string is a Postgres TS_vector
                # award_id_string = piid + fain + uri
                filter_obj |= Q(award_ts_vector=val)
            queryset = queryset.filter(filter_obj)

        elif key == "program_numbers":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset = queryset.filter(cfda_number__in=in_query)

        elif key == "naics_codes":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset = queryset.filter(naics_code__in=in_query)

        elif key == "psc_codes":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset = queryset.filter(
                    product_or_service_code__in=in_query)

        elif key == "contract_pricing_type_codes":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset = queryset.filter(
                    type_of_contract_pricing__in=in_query)

        elif key == "set_aside_type_codes":
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(type_set_aside__exact=v)
            queryset = queryset.filter(or_queryset)

        elif key == "extent_competed_type_codes":
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(extent_competed__exact=v)
            queryset = queryset.filter(or_queryset)

        # Federal Account Filter
        elif key == "federal_account_ids":
            faba_flag = True
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(treasury_account__federal_account_id=v)
            faba_queryset = faba_queryset.filter(or_queryset)

        # Federal Account Filter
        elif key == "object_class":
            faba_flag = True
            result = Q()
            for oc in value:
                subresult = Q()
                for (key, values) in oc.items():
                    subresult &= filter_on(
                        "treasury_account__program_balances__object_class",
                        key, values)
                result |= subresult
            faba_queryset = faba_queryset.filter(result)

        # Federal Account Filter
        elif key == "program_activity":
            faba_flag = True
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(
                    treasury_account__program_balances__program_activity__program_activity_code
                    =v)
            faba_queryset = faba_queryset.filter(or_queryset)

    if faba_flag:
        award_ids = faba_queryset.values('award_id')
        queryset = queryset.filter(award_id__in=award_ids)

    return queryset
def matview_search_filter(filters, model, for_downloads=False):
    queryset = model.objects.all()

    recipient_scope_q = Q(recipient_location_country_code="USA") | Q(recipient_location_country_name="UNITED STATES")
    pop_scope_q = Q(pop_country_code="USA") | Q(pop_country_name="UNITED STATES")

    faba_flag = False
    faba_queryset = FinancialAccountsByAwards.objects.filter(award__isnull=False)

    for key, value in filters.items():
        if value is None:
            raise InvalidParameterException("Invalid filter: " + key + " has null as its value.")

        key_list = [
            "keywords",
            "elasticsearch_keyword",
            "time_period",
            "award_type_codes",
            "prime_and_sub_award_types",
            "agencies",
            "legal_entities",
            "recipient_id",
            "recipient_search_text",
            "recipient_scope",
            "recipient_locations",
            "recipient_type_names",
            "place_of_performance_scope",
            "place_of_performance_locations",
            "award_amounts",
            "award_ids",
            "program_numbers",
            "naics_codes",
            PSCCodes.underscore_name,
            "contract_pricing_type_codes",
            "set_aside_type_codes",
            "extent_competed_type_codes",
            TasCodes.underscore_name,
            TreasuryAccounts.underscore_name,
            # next 3 keys used by federal account page
            "federal_account_ids",
            "object_class",
            "program_activity",
        ]

        if key not in key_list:
            raise InvalidParameterException("Invalid filter: " + key + " does not exist.")

        if key == "keywords":

            def keyword_parse(keyword):
                # keyword_ts_vector & award_ts_vector are Postgres TS_vectors.
                # keyword_ts_vector = recipient_name + naics_code + naics_description
                #     + psc_description + awards_description
                # award_ts_vector = piid + fain + uri
                filter_obj = Q(keyword_ts_vector=keyword) | Q(award_ts_vector=keyword)
                if keyword.isnumeric():
                    filter_obj |= Q(naics_code__contains=keyword)
                if len(keyword) == 4 and PSC.objects.all().filter(code__iexact=keyword).exists():
                    filter_obj |= Q(product_or_service_code__iexact=keyword)

                return filter_obj

            filter_obj = Q()
            for keyword in value:
                filter_obj |= keyword_parse(keyword)
            potential_duns = list(filter((lambda x: len(x) > 7 and len(x) < 10), value))
            if len(potential_duns) > 0:
                filter_obj |= Q(recipient_unique_id__in=potential_duns) | Q(
                    parent_recipient_unique_id__in=potential_duns
                )

            queryset = queryset.filter(filter_obj)

        elif key == "elasticsearch_keyword":
            keyword = " ".join(value) if isinstance(value, list) else value
            transaction_ids = elasticsearch_helper.get_download_ids(keyword=keyword, field="transaction_id")
            # flatten IDs
            transaction_ids = list(itertools.chain.from_iterable(transaction_ids))
            logger.info("Found {} transactions based on keyword: {}".format(len(transaction_ids), keyword))
            transaction_ids = [str(transaction_id) for transaction_id in transaction_ids]
            queryset = queryset.extra(
                where=['"transaction_normalized"."id" = ANY(\'{{{}}}\'::int[])'.format(",".join(transaction_ids))]
            )

        elif key == "time_period":
            min_date = API_SEARCH_MIN_DATE
            if for_downloads:
                min_date = API_MIN_DATE
            queryset &= combine_date_range_queryset(value, model, min_date, API_MAX_DATE)

        elif key == "award_type_codes":
            queryset = queryset.filter(type__in=value)

        elif key == "prime_and_sub_award_types":
            award_types = value.get("prime_awards")
            if award_types:
                queryset = queryset.filter(type__in=award_types)

        elif key == "agencies":
            # TODO: Make function to match agencies in award filter throwing dupe error
            funding_toptier = Q()
            funding_subtier = Q()
            awarding_toptier = Q()
            awarding_subtier = Q()
            for v in value:
                type = v["type"]
                tier = v["tier"]
                name = v["name"]
                if type == "funding":
                    if tier == "toptier":
                        funding_toptier |= Q(funding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if "toptier_name" in v:
                            funding_subtier |= Q(funding_subtier_agency_name=name) & Q(
                                funding_toptier_agency_name=v["toptier_name"]
                            )
                        else:
                            funding_subtier |= Q(funding_subtier_agency_name=name)

                elif type == "awarding":
                    if tier == "toptier":
                        awarding_toptier |= Q(awarding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if "toptier_name" in v:
                            awarding_subtier |= Q(awarding_subtier_agency_name=name) & Q(
                                awarding_toptier_agency_name=v["toptier_name"]
                            )
                        else:
                            awarding_subtier |= Q(awarding_subtier_agency_name=name)

            awarding_queryfilter = Q()
            funding_queryfilter = Q()

            # Since these are Q filters, no DB hits for boolean checks
            if funding_toptier:
                funding_queryfilter |= funding_toptier
            if funding_subtier:
                funding_queryfilter |= funding_subtier
            if awarding_toptier:
                awarding_queryfilter |= awarding_toptier
            if awarding_subtier:
                awarding_queryfilter |= awarding_subtier

            queryset = queryset.filter(funding_queryfilter & awarding_queryfilter)

        elif key == "legal_entities":
            # This filter key has effectively become obsolete by recipient_search_text
            msg = 'API request included "{}" key. No filtering will occur with provided value "{}"'
            logger.info(msg.format(key, value))
            # in_query = [v for v in value]
            # if len(in_query) != 0:
            #     queryset &= model.objects.filter(recipient_id__in=in_query)

        elif key == "recipient_search_text":
            all_filters_obj = Q()
            for recip in value:
                upper_recipient_string = str(recip).upper()
                # recipient_name_ts_vector is a postgres TS_Vector
                filter_obj = Q(recipient_name_ts_vector=upper_recipient_string)
                if len(upper_recipient_string) == 9 and upper_recipient_string[:5].isnumeric():
                    filter_obj |= Q(recipient_unique_id=upper_recipient_string)
                all_filters_obj |= filter_obj
            queryset = queryset.filter(all_filters_obj)

        elif key == "recipient_id":
            filter_obj = Q()
            recipient_hash = value[:-2]

            if value.endswith("P"):  # For parent types, gather all of the children's transactions
                parent_duns_rows = RecipientProfile.objects.filter(
                    recipient_hash=recipient_hash, recipient_level="P"
                ).values("recipient_unique_id")
                if len(parent_duns_rows) == 1:
                    parent_duns = parent_duns_rows[0]["recipient_unique_id"]
                    filter_obj = Q(parent_recipient_unique_id=parent_duns)
                elif len(parent_duns_rows) > 2:
                    # shouldn't occur
                    raise InvalidParameterException("Non-unique parent record found in RecipientProfile")
            elif value.endswith("C"):
                filter_obj = Q(recipient_hash=recipient_hash, parent_recipient_unique_id__isnull=False)
            else:
                # "R" recipient level
                filter_obj = Q(recipient_hash=recipient_hash, parent_recipient_unique_id__isnull=True)
            queryset = queryset.filter(filter_obj)

        elif key == "recipient_scope":
            if value == "domestic":
                queryset = queryset.filter(recipient_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(recipient_scope_q)
            else:
                raise InvalidParameterException("Invalid filter: recipient_scope type is invalid.")

        elif key == "recipient_locations":
            queryset = queryset.filter(geocode_filter_locations("recipient_location", value))

        elif key == "recipient_type_names":
            if len(value) != 0:
                queryset = queryset.filter(business_categories__overlap=value)

        elif key == "place_of_performance_scope":
            if value == "domestic":
                queryset = queryset.filter(pop_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(pop_scope_q)
            else:
                raise InvalidParameterException("Invalid filter: place_of_performance_scope is invalid.")

        elif key == "place_of_performance_locations":
            queryset = queryset.filter(geocode_filter_locations("pop", value))

        elif key == "award_amounts":
            queryset &= total_obligation_queryset(value, model, filters)

        elif key == "award_ids":
            queryset = build_award_ids_filter(queryset, value, ("piid", "fain", "uri"))

        elif key == "program_numbers":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset = queryset.filter(cfda_number__in=in_query)

        elif key == "naics_codes":
            if isinstance(value, list):
                require = value
            elif isinstance(value, dict):
                require = value.get("require") or []
                if value.get("exclude"):
                    raise NotImplementedException(
                        "NOT IMPLEMENTED: postgres endpoint does not currently support excluded naics!"
                    )

            else:
                raise InvalidParameterException("naics_codes must be an array or object")

            if [value for value in require if len(str(value)) not in [2, 4, 6]]:
                raise InvalidParameterException(
                    "naics code filtering only supported for codes with lengths of 2, 4, and 6"
                )

            regex = f"^({'|'.join([str(elem) for elem in require])}).*"
            queryset = queryset.filter(naics_code__regex=regex)

        elif key == PSCCodes.underscore_name:
            q = PSCCodes.build_tas_codes_filter(value)
            queryset = queryset.filter(q) if q else queryset

        elif key == "contract_pricing_type_codes":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset = queryset.filter(type_of_contract_pricing__in=in_query)

        elif key == "set_aside_type_codes":
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(type_set_aside__exact=v)
            queryset = queryset.filter(or_queryset)

        elif key == "extent_competed_type_codes":
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(extent_competed__exact=v)
            queryset = queryset.filter(or_queryset)

        # Because these two filters OR with each other, we need to know about the presence of both filters to know what to do
        # This filter was picked arbitrarily to be the one that checks for the other
        elif key == TasCodes.underscore_name:
            q = TasCodes.build_tas_codes_filter(queryset, value)
            if TreasuryAccounts.underscore_name in filters.keys():
                q |= TreasuryAccounts.build_tas_codes_filter(queryset, filters[TreasuryAccounts.underscore_name])
            queryset = queryset.filter(q)

        elif key == TreasuryAccounts.underscore_name and TasCodes.underscore_name not in filters.keys():
            queryset = queryset.filter(TreasuryAccounts.build_tas_codes_filter(queryset, value))

        # Federal Account Filter
        elif key == "federal_account_ids":
            faba_flag = True
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(treasury_account__federal_account_id=v)
            faba_queryset = faba_queryset.filter(or_queryset)

        # Federal Account Filter
        elif key == "object_class":
            result = Q()
            for oc in value:
                subresult = Q()
                subresult &= filter_on("award__financial_set__object_class", "object_class", oc)
                result |= subresult
            queryset = queryset.filter(result)

        # Federal Account Filter
        elif key == "program_activity":
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(award__financial_set__program_activity__id=v)
            queryset = queryset.filter(or_queryset)

    if faba_flag:
        award_ids = faba_queryset.values("award_id")
        queryset = queryset.filter(award_id__in=award_ids)

    return queryset
Esempio n. 9
0
def subaward_filter(filters, for_downloads=False):
    queryset = SubawardView.objects.all()

    recipient_scope_q = Q(recipient_location_country_code="USA") | Q(
        recipient_location_country_name="UNITED STATES")
    pop_scope_q = Q(pop_country_code="USA") | Q(
        pop_country_name="UNITED STATES")

    for key, value in filters.items():

        if value is None:
            raise InvalidParameterException("Invalid filter: " + key +
                                            " has null as its value.")

        key_list = [
            "keywords",
            "elasticsearch_keyword",
            "time_period",
            "award_type_codes",
            "prime_and_sub_award_types",
            "agencies",
            "legal_entities",
            "recipient_search_text",
            "recipient_scope",
            "recipient_locations",
            "recipient_type_names",
            "place_of_performance_scope",
            "place_of_performance_locations",
            "award_amounts",
            "award_ids",
            "program_numbers",
            "naics_codes",
            PSCCodes.underscore_name,
            "contract_pricing_type_codes",
            "set_aside_type_codes",
            "extent_competed_type_codes",
            TasCodes.underscore_name,
            TreasuryAccounts.underscore_name,
        ]

        if key not in key_list:
            raise InvalidParameterException("Invalid filter: " + key +
                                            " does not exist.")

        if key == "keywords":

            def keyword_parse(keyword):
                # keyword_ts_vector & award_ts_vector are Postgres TS_vectors.
                # keyword_ts_vector = recipient_name + psc_description + subaward_description
                # award_ts_vector = piid + fain + uri + subaward_number
                filter_obj = Q(keyword_ts_vector=keyword) | Q(
                    award_ts_vector=keyword)
                # Commenting out until NAICS is associated with subawards in DAIMS 1.3.1
                # if keyword.isnumeric():
                #     filter_obj |= Q(naics_code__contains=keyword)
                if len(keyword) == 4 and PSC.objects.filter(
                        code__iexact=keyword).exists():
                    filter_obj |= Q(product_or_service_code__iexact=keyword)

                return filter_obj

            filter_obj = Q()
            for keyword in value:
                filter_obj |= keyword_parse(keyword)
            potential_duns = list(
                filter((lambda x: len(x) > 7 and len(x) < 10), value))
            if len(potential_duns) > 0:
                filter_obj |= Q(recipient_unique_id__in=potential_duns) | Q(
                    parent_recipient_unique_id__in=potential_duns)

            queryset = queryset.filter(filter_obj)

        elif key == "elasticsearch_keyword":
            keyword = value
            transaction_ids = elasticsearch_helper.get_download_ids(
                keyword=keyword, field="transaction_id")
            # flatten IDs
            transaction_ids = list(
                itertools.chain.from_iterable(transaction_ids))
            logger.info("Found {} transactions based on keyword: {}".format(
                len(transaction_ids), keyword))
            transaction_ids = [
                str(transaction_id) for transaction_id in transaction_ids
            ]
            queryset = queryset.filter(latest_transaction_id__isnull=False)

            # Prepare a SQL snippet to include in the predicate for searching an array of transaction IDs
            sql_fragment = '"subaward_view"."latest_transaction_id" = ANY(\'{{{}}}\'::int[])'  # int[] -> int array type
            queryset = queryset.extra(
                where=[sql_fragment.format(",".join(transaction_ids))])

        elif key == "time_period":
            min_date = API_SEARCH_MIN_DATE
            if for_downloads:
                min_date = API_MIN_DATE
            queryset &= combine_date_range_queryset(value, SubawardView,
                                                    min_date, API_MAX_DATE)

        elif key == "award_type_codes":
            queryset = queryset.filter(prime_award_type__in=value)

        elif key == "prime_and_sub_award_types":
            award_types = value.get("sub_awards")
            if award_types:
                queryset = queryset.filter(award_type__in=award_types)

        elif key == "agencies":
            # TODO: Make function to match agencies in award filter throwing dupe error
            funding_toptier = Q()
            funding_subtier = Q()
            awarding_toptier = Q()
            awarding_subtier = Q()
            for v in value:
                type = v["type"]
                tier = v["tier"]
                name = v["name"]
                if type == "funding":
                    if tier == "toptier":
                        funding_toptier |= Q(funding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if "toptier_name" in v:
                            funding_subtier |= Q(
                                funding_subtier_agency_name=name
                            ) & Q(
                                funding_toptier_agency_name=v["toptier_name"])
                        else:
                            funding_subtier |= Q(
                                funding_subtier_agency_name=name)

                elif type == "awarding":
                    if tier == "toptier":
                        awarding_toptier |= Q(
                            awarding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if "toptier_name" in v:
                            awarding_subtier |= Q(
                                awarding_subtier_agency_name=name
                            ) & Q(
                                awarding_toptier_agency_name=v["toptier_name"])
                        else:
                            awarding_subtier |= Q(
                                awarding_subtier_agency_name=name)

            awarding_queryfilter = Q()
            funding_queryfilter = Q()

            # Since these are Q filters, no DB hits for boolean checks
            if funding_toptier:
                funding_queryfilter |= funding_toptier
            if funding_subtier:
                funding_queryfilter |= funding_subtier
            if awarding_toptier:
                awarding_queryfilter |= awarding_toptier
            if awarding_subtier:
                awarding_queryfilter |= awarding_subtier

            queryset = queryset.filter(funding_queryfilter
                                       & awarding_queryfilter)

        elif key == "legal_entities":
            # This filter key has effectively become obsolete by recipient_search_text
            msg = 'API request included "{}" key. No filtering will occur with provided value "{}"'
            logger.info(msg.format(key, value))

        elif key == "recipient_search_text":

            def recip_string_parse(recipient_string):
                upper_recipient_string = recipient_string.upper()

                # recipient_name_ts_vector is a postgres TS_Vector
                filter_obj = Q(recipient_name_ts_vector=upper_recipient_string)
                if len(upper_recipient_string
                       ) == 9 and upper_recipient_string[:5].isnumeric():
                    filter_obj |= Q(recipient_unique_id=upper_recipient_string)
                return filter_obj

            filter_obj = Q()
            for recipient in value:
                filter_obj |= recip_string_parse(recipient)
            queryset = queryset.filter(filter_obj)

        elif key == "recipient_scope":
            if value == "domestic":
                queryset = queryset.filter(recipient_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(recipient_scope_q)
            else:
                raise InvalidParameterException(
                    "Invalid filter: recipient_scope type is invalid.")

        elif key == "recipient_locations":
            queryset = queryset.filter(
                geocode_filter_locations("recipient_location", value))

        elif key == "recipient_type_names":
            if len(value) != 0:
                queryset = queryset.filter(business_categories__overlap=value)

        elif key == "place_of_performance_scope":
            if value == "domestic":
                queryset = queryset.filter(pop_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(pop_scope_q)
            else:
                raise InvalidParameterException(
                    "Invalid filter: place_of_performance_scope is invalid.")

        elif key == "place_of_performance_locations":
            queryset = queryset.filter(geocode_filter_locations("pop", value))

        elif key == "award_amounts":
            queryset &= total_obligation_queryset(value, SubawardView, filters)

        elif key == "award_ids":
            queryset = build_award_ids_filter(queryset, value,
                                              ("piid", "fain"))

        elif key == PSCCodes.underscore_name:
            q = PSCCodes.build_tas_codes_filter(value)
            queryset = queryset.filter(q) if q else queryset

        # add "naics_codes" (column naics) after NAICS are mapped to subawards
        elif key in ("program_numbers", "contract_pricing_type_codes"):
            filter_to_col = {
                "program_numbers": "cfda_number",
                "contract_pricing_type_codes": "type_of_contract_pricing",
            }
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= SubawardView.objects.filter(
                    **{"{}__in".format(filter_to_col[key]): in_query})

        elif key in ("set_aside_type_codes", "extent_competed_type_codes"):
            or_queryset = Q()
            filter_to_col = {
                "set_aside_type_codes": "type_set_aside",
                "extent_competed_type_codes": "extent_competed"
            }
            in_query = [v for v in value]
            for v in value:
                or_queryset |= Q(
                    **{"{}__exact".format(filter_to_col[key]): in_query})
            queryset = queryset.filter(or_queryset)

        # Because these two filters OR with each other, we need to know about the presense of both filters to know what to do
        # This filter was picked arbitrarily to be the one that checks for the other
        elif key == TasCodes.underscore_name:
            q = TasCodes.build_tas_codes_filter(queryset, value)
            if TreasuryAccounts.underscore_name in filters.keys():
                q |= TreasuryAccounts.build_tas_codes_filter(
                    queryset, filters[TreasuryAccounts.underscore_name])
            queryset = queryset.filter(q)

        elif key == TreasuryAccounts.underscore_name and TasCodes.underscore_name not in filters.keys(
        ):
            queryset = queryset.filter(
                TreasuryAccounts.build_tas_codes_filter(queryset, value))

    return queryset
Esempio n. 10
0
def subaward_filter(filters, for_downloads=False):

    queryset = SubawardView.objects.all()
    for key, value in filters.items():

        if value is None:
            raise InvalidParameterException('Invalid filter: ' + key +
                                            ' has null as its value.')

        key_list = [
            'keywords', 'elasticsearch_keyword', 'time_period',
            'award_type_codes', 'agencies', 'legal_entities',
            'recipient_search_text', 'recipient_scope', 'recipient_locations',
            'recipient_type_names', 'place_of_performance_scope',
            'place_of_performance_locations', 'award_amounts', 'award_ids',
            'program_numbers', 'naics_codes', 'psc_codes',
            'contract_pricing_type_codes', 'set_aside_type_codes',
            'extent_competed_type_codes'
        ]

        if key not in key_list:
            raise InvalidParameterException('Invalid filter: ' + key +
                                            ' does not exist.')

        if key == "keywords":

            def keyword_parse(keyword):
                # keyword_ts_vector & award_ts_vector are Postgres TS_vectors.
                # keyword_ts_vector = recipient_name + psc_description + subaward_description
                # award_ts_vector = piid + fain + uri + subaward_number
                filter_obj = Q(keyword_ts_vector=keyword) | \
                    Q(award_ts_vector=keyword)
                if keyword.isnumeric():
                    filter_obj |= Q(naics_code__contains=keyword)
                if len(keyword) == 4 and PSC.objects.all().filter(
                        code__iexact=keyword).exists():
                    filter_obj |= Q(product_or_service_code__iexact=keyword)

                return filter_obj

            filter_obj = Q()
            for keyword in value:
                filter_obj |= keyword_parse(keyword)
            potential_duns = list(
                filter((lambda x: len(x) > 7 and len(x) < 10), value))
            if len(potential_duns) > 0:
                filter_obj |= Q(recipient_unique_id__in=potential_duns) | \
                    Q(parent_recipient_unique_id__in=potential_duns)

            queryset = queryset.filter(filter_obj)

        elif key == "elasticsearch_keyword":
            keyword = value
            transaction_ids = elasticsearch_helper.get_download_ids(
                keyword=keyword, field='transaction_id')
            # flatten IDs
            transaction_ids = list(
                itertools.chain.from_iterable(transaction_ids))
            logger.info('Found {} transactions based on keyword: {}'.format(
                len(transaction_ids), keyword))
            transaction_ids = [
                str(transaction_id) for transaction_id in transaction_ids
            ]
            queryset = queryset.filter(latest_transaction_id__isnull=False)
            queryset &= queryset.extra(where=[
                '"latest_transaction_id" = ANY(\'{{{}}}\'::int[])'.format(
                    ','.join(transaction_ids))
            ])

        elif key == "time_period":
            min_date = API_SEARCH_MIN_DATE
            if for_downloads:
                min_date = API_MIN_DATE
            queryset &= combine_date_range_queryset(value, SubawardView,
                                                    min_date, API_MAX_DATE)

        elif key == "award_type_codes":
            idv_flag = all(i in value for i in contract_type_mapping.keys())

            if len(value) != 0:
                filter_obj = Q(prime_award_type__in=value)
                if idv_flag:
                    filter_obj |= Q(pulled_from='IDV')
                queryset &= SubawardView.objects.filter(filter_obj)

        elif key == "agencies":
            # TODO: Make function to match agencies in award filter throwing dupe error
            funding_toptier = Q()
            funding_subtier = Q()
            awarding_toptier = Q()
            awarding_subtier = Q()
            for v in value:
                type = v["type"]
                tier = v["tier"]
                name = v["name"]
                if type == "funding":
                    if tier == "toptier":
                        funding_toptier |= Q(funding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if 'toptier_name' in v:
                            funding_subtier |= (
                                Q(funding_subtier_agency_name=name) &
                                Q(funding_toptier_agency_name=v['toptier_name']
                                  ))
                        else:
                            funding_subtier |= Q(
                                funding_subtier_agency_name=name)

                elif type == "awarding":
                    if tier == "toptier":
                        awarding_toptier |= Q(
                            awarding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if 'toptier_name' in v:
                            awarding_subtier |= (Q(
                                awarding_subtier_agency_name=name
                            ) & Q(
                                awarding_toptier_agency_name=v['toptier_name'])
                                                 )
                        else:
                            awarding_subtier |= Q(
                                awarding_subtier_agency_name=name)

            awarding_queryfilter = Q()
            funding_queryfilter = Q()

            # Since these are Q filters, no DB hits for boolean checks
            if funding_toptier:
                funding_queryfilter |= funding_toptier
            if funding_subtier:
                funding_queryfilter |= funding_subtier
            if awarding_toptier:
                awarding_queryfilter |= awarding_toptier
            if awarding_subtier:
                awarding_queryfilter |= awarding_subtier

            queryset = queryset.filter(funding_queryfilter
                                       & awarding_queryfilter)

        elif key == "legal_entities":
            # This filter key has effectively become obsolete by recipient_search_text
            msg = 'API request included "{}" key. No filtering will occur with provided value "{}"'
            logger.info(msg.format(key, value))

        elif key == "recipient_search_text":

            def recip_string_parse(recipient_string):
                upper_recipient_string = recipient_string.upper()

                # recipient_name_ts_vector is a postgres TS_Vector
                filter_obj = Q(recipient_name_ts_vector=upper_recipient_string)
                if len(upper_recipient_string
                       ) == 9 and upper_recipient_string[:5].isnumeric():
                    filter_obj |= Q(recipient_unique_id=upper_recipient_string)
                return filter_obj

            filter_obj = Q()
            for recipient in value:
                filter_obj |= recip_string_parse(recipient)
            queryset &= SubawardView.objects.filter(filter_obj)

        elif key == "recipient_scope":
            if value == "domestic":
                queryset = queryset.filter(
                    recipient_location_country_name="UNITED STATES")
            elif value == "foreign":
                queryset = queryset.exclude(
                    recipient_location_country_name="UNITED STATES")
            else:
                raise InvalidParameterException(
                    'Invalid filter: recipient_scope type is invalid.')

        elif key == "recipient_locations":
            or_queryset = geocode_filter_locations('recipient_location', value,
                                                   SubawardView, True)
            queryset &= or_queryset

        elif key == "recipient_type_names":
            if len(value) != 0:
                queryset &= SubawardView.objects.filter(
                    business_categories__overlap=value)

        elif key == "place_of_performance_scope":
            if value == "domestic":
                queryset = queryset.filter(
                    Q(pop_country_name="UNITED STATES")
                    | Q(pop_country_code="USA"))
            elif value == "foreign":
                queryset = queryset.exclude(
                    Q(pop_country_name="UNITED STATES")
                    | Q(pop_country_code="USA"))
            else:
                raise InvalidParameterException(
                    'Invalid filter: place_of_performance_scope is invalid.')

        elif key == "place_of_performance_locations":
            queryset &= geocode_filter_locations('pop', value, SubawardView,
                                                 True)

        elif key == "award_amounts":
            queryset &= total_obligation_queryset(value, SubawardView, filters)

        elif key == "award_ids":
            filter_obj = Q()
            for val in value:
                # award_id_string is a Postgres TS_vector
                # award_id_string = piid + fain + uri + subaward_number
                filter_obj |= Q(award_ts_vector=val)
            queryset &= SubawardView.objects.filter(filter_obj)

        # add "naics_codes" (column naics) after NAICS are mapped to subawards
        elif key in ("program_numbers", "psc_codes",
                     "contract_pricing_type_codes"):
            filter_to_col = {
                "program_numbers": "cfda_number",
                "psc_codes": "product_or_service_code",
                "contract_pricing_type_codes": "type_of_contract_pricing",
            }
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= SubawardView.objects.filter(
                    **{'{}__in'.format(filter_to_col[key]): in_query})

        elif key in ("set_aside_type_codes", "extent_competed_type_codes"):
            or_queryset = Q()
            filter_to_col = {
                "set_aside_type_codes": "type_set_aside",
                "extent_competed_type_codes": "extent_competed",
            }
            in_query = [v for v in value]
            for v in value:
                or_queryset |= Q(
                    **{'{}__exact'.format(filter_to_col[key]): in_query})
            queryset = queryset.filter(or_queryset)

    return queryset
Esempio n. 11
0
def subaward_filter(filters):

    queryset = Subaward.objects.filter(
        award__latest_transaction_id__isnull=False,
        award__category__isnull=False)
    for key, value in filters.items():

        if value is None:
            raise InvalidParameterException('Invalid filter: ' + key +
                                            ' has null as its value.')

        key_list = [
            'keyword', 'elasticsearch_keyword', 'time_period',
            'award_type_codes', 'agencies', 'legal_entities',
            'recipient_search_text', 'recipient_scope', 'recipient_locations',
            'recipient_type_names', 'place_of_performance_scope',
            'place_of_performance_locations', 'award_amounts', 'award_ids',
            'program_numbers', 'naics_codes', 'psc_codes',
            'contract_pricing_type_codes', 'set_aside_type_codes',
            'extent_competed_type_codes'
        ]

        if key not in key_list:
            raise InvalidParameterException('Invalid filter: ' + key +
                                            ' does not exist.')

        if key == "keyword":
            keyword = value  # alias

            # description_match = False
            # description_qs = queryset.filter(description__icontains=keyword)
            # if description_qs.exists():
            #     description_match = True

            recipient_match = False
            recipient_list = LegalEntity.objects.all().values(
                'legal_entity_id').filter(recipient_name__icontains=keyword)
            if recipient_list.exists():
                recipient_match = True
                recipient_qs = queryset.filter(recipient__in=recipient_list)

            naics_match = False
            if keyword.isnumeric():
                naics_list = NAICS.objects.all().filter(
                    code__icontains=keyword).values('code')
            else:
                naics_list = NAICS.objects.all().filter(
                    description__icontains=keyword).values('code')
            if naics_list.exists():
                naics_match = True
                naics_qs = queryset.filter(
                    award__latest_transaction__contract_data__naics__in=
                    naics_list)

            psc_match = False
            if len(keyword) == 4 and PSC.objects.all().filter(
                    code=keyword).exists():
                psc_list = PSC.objects.all().filter(
                    code=keyword).values('code')
            else:
                psc_list = PSC.objects.all().filter(
                    description__icontains=keyword).values('code')
            if psc_list.exists():
                psc_match = True
                psc_qs = queryset.filter(
                    award__latest_transaction__contract_data__product_or_service_code__in
                    =psc_list)

            duns_match = False
            non_parent_duns_list = LegalEntity.objects.all().values(
                'legal_entity_id').filter(recipient_unique_id=keyword)
            parent_duns_list = LegalEntity.objects.all().values(
                'legal_entity_id').filter(parent_recipient_unique_id=keyword)
            duns_list = non_parent_duns_list | parent_duns_list
            if duns_list.exists():
                duns_match = True
                duns_qs = queryset.filter(recipient__in=duns_list)

            piid_qs = queryset.filter(award__piid=keyword)
            fain_qs = queryset.filter(award__fain=keyword)

            # Always filter on fain/piid because fast:
            queryset = piid_qs
            queryset |= fain_qs
            # if description_match:
            #     queryset |= description_qs
            if recipient_match:
                queryset |= recipient_qs
            if naics_match:
                queryset |= naics_qs
            if psc_match:
                queryset |= psc_qs
            if duns_match:
                queryset |= duns_qs

        elif key == "elasticsearch_keyword":
            keyword = value
            transaction_ids = elasticsearch_helper.get_download_ids(
                keyword=keyword, field='transaction_id')
            # flatten IDs
            transaction_ids = list(
                itertools.chain.from_iterable(transaction_ids))
            logger.info('Found {} transactions based on keyword: {}'.format(
                len(transaction_ids), keyword))
            transaction_ids = [
                str(transaction_id) for transaction_id in transaction_ids
            ]
            queryset = queryset.filter(
                award__latest_transaction__id__isnull=False)
            queryset &= queryset.extra(where=[
                '"transaction_normalized"."id" = ANY(\'{{{}}}\'::int[])'.
                format(','.join(transaction_ids))
            ])

        elif key == "time_period":
            or_queryset = None
            queryset_init = False
            for v in value:
                date_type = v.get("date_type", "action_date")
                if date_type not in ["action_date", "last_modified_date"]:
                    raise InvalidParameterException(
                        'Invalid date_type: {}'.format(date_type))
                date_field = (
                    date_type if date_type == 'action_date' else
                    'award__latest_transaction__{}'.format(date_type))
                kwargs = {}
                if v.get("start_date") is not None:
                    kwargs["{}__gte".format(date_field)] = v.get("start_date")
                if v.get("end_date") is not None:
                    kwargs["{}__lte".format(date_field)] = v.get("end_date")
                # (may have to cast to date) (oct 1 to sept 30)
                if queryset_init:
                    or_queryset |= Subaward.objects.filter(**kwargs)
                else:
                    queryset_init = True
                    or_queryset = Subaward.objects.filter(**kwargs)
            if queryset_init:
                queryset &= or_queryset

        elif key == "award_type_codes":
            or_queryset = []

            idv_flag = all(i in value for i in ['A', 'B', 'C', 'D'])

            for v in value:
                or_queryset.append(v)
            if len(or_queryset) != 0:
                filter_obj = Q(award__type__in=or_queryset)
                if idv_flag:
                    filter_obj |= Q(
                        award__latest_transaction__contract_data__pulled_from=
                        'IDV')
                queryset &= Subaward.objects.filter(filter_obj)

        elif key == "agencies":
            # TODO: Make function to match agencies in award filter throwing dupe error
            or_queryset = None
            funding_toptier = []
            funding_subtier = []
            awarding_toptier = []
            awarding_subtier = []
            for v in value:
                type = v["type"]
                tier = v["tier"]
                name = v["name"]
                if type == "funding":
                    if tier == "toptier":
                        funding_toptier.append(name)
                    elif tier == "subtier":
                        funding_subtier.append(name)
                    else:
                        raise InvalidParameterException(
                            'Invalid filter: agencies ' + tier +
                            ' tier is invalid.')
                elif type == "awarding":
                    if tier == "toptier":
                        awarding_toptier.append(name)
                    elif tier == "subtier":
                        awarding_subtier.append(name)
                    else:
                        raise InvalidParameterException(
                            'Invalid filter: agencies ' + tier +
                            ' tier is invalid.')
                else:
                    raise InvalidParameterException(
                        'Invalid filter: agencies ' + type +
                        ' type is invalid.')
            if len(funding_toptier) != 0:
                queryset &= Subaward.objects.filter(
                    funding_agency__toptier_agency__name__in=funding_toptier)
            if len(funding_subtier) != 0:
                queryset &= Subaward.objects.filter(
                    funding_agency__subtier_agency__name__in=funding_subtier)
            if len(awarding_toptier) != 0:
                queryset &= Subaward.objects.filter(
                    awarding_agency__toptier_agency__name__in=awarding_toptier)
            if len(awarding_subtier) != 0:
                queryset &= Subaward.objects.filter(
                    awarding_agency__subtier_agency__name__in=awarding_subtier)

        elif key == "legal_entities":
            or_queryset = []
            for v in value:
                or_queryset.append(v)
            if len(or_queryset) != 0:
                queryset &= Subaward.objects.filter(
                    recipient__legal_entity_id__in=or_queryset)

        elif key == "recipient_search_text":
            if len(value) != 1:
                raise InvalidParameterException(
                    'Invalid filter: recipient_search_text must have exactly one value.'
                )
            recipient_string = str(value[0])

            filter_obj = Q(
                recipient__recipient_name__icontains=recipient_string)

            if len(recipient_string) == 9:
                filter_obj |= Q(
                    recipient__recipient_unique_id__iexact=recipient_string)

            queryset &= Subaward.objects.filter(filter_obj)

        elif key == "recipient_scope":
            if value == "domestic":
                queryset = queryset.filter(
                    recipient__location__country_name="UNITED STATES")
            elif value == "foreign":
                queryset = queryset.exclude(
                    recipient__location__country_name="UNITED STATES")
            else:
                raise InvalidParameterException(
                    'Invalid filter: recipient_scope type is invalid.')

        elif key == "recipient_locations":
            or_queryset = geocode_filter_locations('recipient__location',
                                                   value, 'Subaward')
            queryset &= or_queryset

        elif key == "recipient_type_names":
            if len(value) != 0:
                queryset &= Subaward.objects.filter(
                    recipient__business_categories__overlap=value)

        elif key == "place_of_performance_scope":
            if value == "domestic":
                queryset = queryset.filter(
                    place_of_performance__country_name="UNITED STATES")
            elif value == "foreign":
                queryset = queryset.exclude(
                    place_of_performance__country_name="UNITED STATES")
            else:
                raise InvalidParameterException(
                    'Invalid filter: place_of_performance_scope is invalid.')

        elif key == "place_of_performance_locations":
            or_queryset = geocode_filter_locations('place_of_performance',
                                                   value, 'Subaward')

            queryset &= or_queryset

        elif key == "award_amounts":
            or_queryset = None
            queryset_init = False
            for v in value:
                if v.get("lower_bound") is not None and v.get(
                        "upper_bound") is not None:
                    if queryset_init:
                        or_queryset |= Subaward.objects.filter(
                            amount__gt=v["lower_bound"],
                            amount__lt=v["upper_bound"])
                    else:
                        queryset_init = True
                        or_queryset = Subaward.objects.filter(
                            amount__gt=v["lower_bound"],
                            amount__lt=v["upper_bound"])
                elif v.get("lower_bound") is not None:
                    if queryset_init:
                        or_queryset |= Subaward.objects.filter(
                            amount__gt=v["lower_bound"])
                    else:
                        queryset_init = True
                        or_queryset = Subaward.objects.filter(
                            amount__gt=v["lower_bound"])
                elif v.get("upper_bound") is not None:
                    if queryset_init:
                        or_queryset |= Subaward.objects.filter(
                            amount__lt=v["upper_bound"])
                    else:
                        queryset_init = True
                        or_queryset = Subaward.objects.filter(
                            amount__lt=v["upper_bound"])
                else:
                    raise InvalidParameterException(
                        'Invalid filter: award amount has incorrect object.')
            if queryset_init:
                queryset &= or_queryset

        elif key == "award_ids":
            if len(value) != 0:
                filter_obj = Q()
                for val in value:
                    filter_obj |= (Q(award__piid__icontains=val)
                                   | Q(award__fain__icontains=val)
                                   | Q(award__uri__icontains=val))

                queryset &= Subaward.objects.filter(filter_obj)

        elif key == "program_numbers":
            or_queryset = []
            for v in value:
                or_queryset.append(v)
            if len(or_queryset) != 0:
                queryset &= Subaward.objects.filter(
                    award__latest_transaction__assistance_data__cfda_number__in
                    =or_queryset)

        elif key == "naics_codes":
            or_queryset = []
            for v in value:
                or_queryset.append(v)
            if len(or_queryset) != 0:
                queryset &= Subaward.objects.filter(
                    award__latest_transaction__contract_data__naics__in=
                    or_queryset)

        elif key == "psc_codes":
            or_queryset = []
            for v in value:
                or_queryset.append(v)
            if len(or_queryset) != 0:
                queryset &= Subaward.objects.filter(
                    award__latest_transaction__contract_data__product_or_service_code__in
                    =or_queryset)

        elif key == "contract_pricing_type_codes":
            or_queryset = []
            for v in value:
                or_queryset.append(v)
            if len(or_queryset) != 0:
                queryset &= Subaward.objects.filter(
                    award__latest_transaction__contract_data__type_of_contract_pricing__in
                    =or_queryset)

        elif key == "set_aside_type_codes":
            or_queryset = []
            for v in value:
                or_queryset.append(v)
            if len(or_queryset) != 0:
                queryset &= Subaward.objects.filter(
                    award__latest_transaction__contract_data__type_set_aside__in
                    =or_queryset)

        elif key == "extent_competed_type_codes":
            or_queryset = []
            for v in value:
                or_queryset.append(v)
            if len(or_queryset) != 0:
                queryset &= Subaward.objects.filter(
                    award__latest_transaction__contract_data__extent_competed__in
                    =or_queryset)

    return queryset
Esempio n. 12
0
def matview_search_filter(filters, model):
    queryset = model.objects.all()

    faba_flag = False
    faba_queryset = FinancialAccountsByAwards.objects.filter(
        award__isnull=False)

    for key, value in filters.items():
        if value is None:
            raise InvalidParameterException('Invalid filter: ' + key +
                                            ' has null as its value.')

        key_list = [
            'keyword',
            'time_period',
            'award_type_codes',
            'agencies',
            'legal_entities',
            'recipient_search_text',
            'recipient_scope',
            'recipient_locations',
            'recipient_type_names',
            'place_of_performance_scope',
            'place_of_performance_locations',
            'award_amounts',
            'award_ids',
            'program_numbers',
            'naics_codes',
            'psc_codes',
            'contract_pricing_type_codes',
            'set_aside_type_codes',
            'extent_competed_type_codes',
            # next 3 keys used by federal account page
            'federal_account_ids',
            'object_class',
            'program_activity'
        ]

        if key not in key_list:
            raise InvalidParameterException('Invalid filter: ' + key +
                                            ' does not exist.')

        if key == "keyword":
            keyword = value
            upper_kw = keyword.upper()

            # keyword_string & award_id_string are Postgres TS_vectors.
            # keyword_string = recipient_name + naics_code + naics_description + psc_description + awards_description
            # award_id_string = piid + fain + uri
            compound_or = Q(keyword_ts_vector=keyword) | \
                Q(award_ts_vector=keyword) | \
                Q(recipient_unique_id=upper_kw) | \
                Q(parent_recipient_unique_id=keyword)

            if keyword.isnumeric():
                compound_or |= Q(naics_code__contains=keyword)

            if len(keyword) == 4 and PSC.objects.all().filter(
                    code__iexact=keyword).exists():
                compound_or |= Q(product_or_service_code__iexact=keyword)

            queryset = queryset.filter(compound_or)

        elif key == "time_period":
            success, or_queryset = date_or_fy_queryset(value, model,
                                                       "fiscal_year",
                                                       "action_date")
            if success:
                queryset &= or_queryset

        elif key == "award_type_codes":
            idv_flag = all(i in value for i in contract_type_mapping.keys())

            if len(value) != 0:
                filter_obj = Q(type__in=value)
                if idv_flag:
                    filter_obj |= Q(pulled_from='IDV')
                queryset &= model.objects.filter(filter_obj)

        elif key == "agencies":
            # TODO: Make function to match agencies in award filter throwing dupe error
            funding_toptier = Q()
            funding_subtier = Q()
            awarding_toptier = Q()
            awarding_subtier = Q()
            for v in value:
                type = v["type"]
                tier = v["tier"]
                name = v["name"]
                if type == "funding":
                    if tier == "toptier":
                        funding_toptier |= Q(funding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if 'toptier_name' in v:
                            funding_subtier |= (
                                Q(funding_subtier_agency_name=name) &
                                Q(funding_toptier_agency_name=v['toptier_name']
                                  ))
                        else:
                            funding_subtier |= Q(
                                funding_subtier_agency_name=name)

                    else:
                        raise InvalidParameterException(
                            'Invalid filter: agencies ' + tier +
                            ' tier is invalid.')
                elif type == "awarding":
                    if tier == "toptier":
                        awarding_toptier |= Q(
                            awarding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if 'toptier_name' in v:
                            awarding_subtier |= (Q(
                                awarding_subtier_agency_name=name
                            ) & Q(
                                awarding_toptier_agency_name=v['toptier_name'])
                                                 )
                        else:
                            awarding_subtier |= Q(
                                awarding_subtier_agency_name=name)
                    else:
                        raise InvalidParameterException(
                            'Invalid filter: agencies ' + tier +
                            ' tier is invalid.')
                else:
                    raise InvalidParameterException(
                        'Invalid filter: agencies ' + type +
                        ' type is invalid.')

            awarding_queryfilter = Q()
            funding_queryfilter = Q()

            # Since these are Q filters, no DB hits for boolean checks
            if funding_toptier:
                funding_queryfilter |= funding_toptier
            if funding_subtier:
                funding_queryfilter |= funding_subtier
            if awarding_toptier:
                awarding_queryfilter |= awarding_toptier
            if awarding_subtier:
                awarding_queryfilter |= awarding_subtier

            queryset = queryset.filter(funding_queryfilter
                                       & awarding_queryfilter)

        elif key == "legal_entities":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= model.objects.filter(recipient_id__in=in_query)

        elif key == "recipient_search_text":
            if len(value) != 1:
                raise InvalidParameterException(
                    'Invalid filter: recipient_search_text must have exactly one value.'
                )
            upper_recipient_string = str(value[0]).upper()

            # recipient_name_ts_vector is a postgres TS_Vector
            filter_obj = Q(recipient_name_ts_vector=upper_recipient_string)

            if len(upper_recipient_string
                   ) == 9 and upper_recipient_string[:5].isnumeric():
                filter_obj |= Q(recipient_unique_id=upper_recipient_string)

            queryset &= model.objects.filter(filter_obj)

        elif key == "recipient_scope":
            if value == "domestic":
                queryset = queryset.filter(
                    recipient_location_country_name="UNITED STATES")
            elif value == "foreign":
                queryset = queryset.exclude(
                    recipient_location_country_name="UNITED STATES")
            else:
                raise InvalidParameterException(
                    'Invalid filter: recipient_scope type is invalid.')

        elif key == "recipient_locations":
            or_queryset = geocode_filter_locations('recipient_location', value,
                                                   model, True)
            queryset &= or_queryset

        elif key == "recipient_type_names":
            if len(value) != 0:
                queryset &= model.objects.filter(
                    business_categories__overlap=value)

        elif key == "place_of_performance_scope":
            if value == "domestic":
                queryset = queryset.filter(pop_country_name="UNITED STATES")
            elif value == "foreign":
                queryset = queryset.exclude(pop_country_name="UNITED STATES")
            else:
                raise InvalidParameterException(
                    'Invalid filter: place_of_performance_scope is invalid.')

        elif key == "place_of_performance_locations":
            queryset &= geocode_filter_locations('pop', value, model, True)

        elif key == "award_amounts":
            success, and_queryset = total_obligation_queryset(
                value, model, filters)
            if success:
                queryset &= and_queryset

        elif key == "award_ids":
            if len(value) != 0:
                filter_obj = Q()
                for val in value:
                    # award_id_string is a Postgres TS_vector
                    # award_id_string = piid + fain + uri
                    filter_obj |= Q(award_ts_vector=val)
                queryset &= model.objects.filter(filter_obj)

        elif key == "program_numbers":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= model.objects.filter(cfda_number__in=in_query)

        elif key == "naics_codes":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= model.objects.filter(naics_code__in=in_query)

        elif key == "psc_codes":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= model.objects.filter(
                    product_or_service_code__in=in_query)

        elif key == "contract_pricing_type_codes":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= model.objects.filter(
                    type_of_contract_pricing__in=in_query)

        elif key == "set_aside_type_codes":
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(type_set_aside__exact=v)
            queryset = queryset.filter(or_queryset)

        elif key == "extent_competed_type_codes":
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(extent_competed__exact=v)
            queryset = queryset.filter(or_queryset)

        # Federal Account Filter
        elif key == "federal_account_ids":
            faba_flag = True
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(treasury_account__federal_account_id=v)
            faba_queryset = faba_queryset.filter(or_queryset)

        # Federal Account Filter
        elif key == "object_class":
            faba_flag = True
            result = Q()
            for oc in value:
                subresult = Q()
                for (key, values) in oc.items():
                    subresult &= filter_on(
                        "treasury_account__program_balances__object_class",
                        key, values)
                result |= subresult
            faba_queryset = faba_queryset.filter(result)

        # Federal Account Filter
        elif key == "program_activity":
            faba_flag = True
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(
                    treasury_account__program_balances__program_activity__program_activity_code
                    =v)
            faba_queryset = faba_queryset.filter(or_queryset)

    if faba_flag:
        award_ids = faba_queryset.values('award_id')
        queryset = queryset.filter(award_id__in=award_ids)

    return queryset
def subaward_filter(filters, for_downloads=False):

    queryset = SubawardView.objects.all()

    recipient_scope_q = Q(recipient_location_country_code="USA") | Q(recipient_location_country_name="UNITED STATES")
    pop_scope_q = Q(pop_country_code="USA") | Q(pop_country_name="UNITED STATES")

    for key, value in filters.items():

        if value is None:
            raise InvalidParameterException('Invalid filter: ' + key + ' has null as its value.')

        key_list = [
            'keywords',
            'elasticsearch_keyword',
            'time_period',
            'award_type_codes',
            'agencies',
            'legal_entities',
            'recipient_search_text',
            'recipient_scope',
            'recipient_locations',
            'recipient_type_names',
            'place_of_performance_scope',
            'place_of_performance_locations',
            'award_amounts',
            'award_ids',
            'program_numbers',
            'naics_codes',
            'psc_codes',
            'contract_pricing_type_codes',
            'set_aside_type_codes',
            'extent_competed_type_codes'
        ]

        if key not in key_list:
            raise InvalidParameterException('Invalid filter: ' + key + ' does not exist.')

        if key == "keywords":
            def keyword_parse(keyword):
                # keyword_ts_vector & award_ts_vector are Postgres TS_vectors.
                # keyword_ts_vector = recipient_name + psc_description + subaward_description
                # award_ts_vector = piid + fain + uri + subaward_number
                filter_obj = Q(keyword_ts_vector=keyword) | \
                    Q(award_ts_vector=keyword)
                # Commenting out until NAICS is associated with subawards in DAIMS 1.3.1
                # if keyword.isnumeric():
                #     filter_obj |= Q(naics_code__contains=keyword)
                if len(keyword) == 4 and PSC.objects.filter(code__iexact=keyword).exists():
                    filter_obj |= Q(product_or_service_code__iexact=keyword)

                return filter_obj

            filter_obj = Q()
            for keyword in value:
                filter_obj |= keyword_parse(keyword)
            potential_duns = list(filter((lambda x: len(x) > 7 and len(x) < 10), value))
            if len(potential_duns) > 0:
                filter_obj |= Q(recipient_unique_id__in=potential_duns) | \
                    Q(parent_recipient_unique_id__in=potential_duns)

            queryset = queryset.filter(filter_obj)

        elif key == "elasticsearch_keyword":
            keyword = value
            transaction_ids = elasticsearch_helper.get_download_ids(keyword=keyword, field='transaction_id')
            # flatten IDs
            transaction_ids = list(itertools.chain.from_iterable(transaction_ids))
            logger.info('Found {} transactions based on keyword: {}'.format(len(transaction_ids), keyword))
            transaction_ids = [str(transaction_id) for transaction_id in transaction_ids]
            queryset = queryset.filter(latest_transaction_id__isnull=False)

            # Prepare a SQL snippet to include in the predicate for searching an array of transaction IDs
            sql_fragment = '"subaward_view"."latest_transaction_id" = ANY(\'{{{}}}\'::int[])'  # int[] -> int array type
            queryset &= queryset.extra(where=[sql_fragment.format(','.join(transaction_ids))])

        elif key == "time_period":
            min_date = API_SEARCH_MIN_DATE
            if for_downloads:
                min_date = API_MIN_DATE
            queryset &= combine_date_range_queryset(value, SubawardView, min_date, API_MAX_DATE)

        elif key == "award_type_codes":
            queryset &= queryset.filter(Q(prime_award_type__in=value))

        elif key == "agencies":
            # TODO: Make function to match agencies in award filter throwing dupe error
            funding_toptier = Q()
            funding_subtier = Q()
            awarding_toptier = Q()
            awarding_subtier = Q()
            for v in value:
                type = v["type"]
                tier = v["tier"]
                name = v["name"]
                if type == "funding":
                    if tier == "toptier":
                        funding_toptier |= Q(funding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if 'toptier_name' in v:
                            funding_subtier |= (Q(funding_subtier_agency_name=name) &
                                                Q(funding_toptier_agency_name=v['toptier_name']))
                        else:
                            funding_subtier |= Q(funding_subtier_agency_name=name)

                elif type == "awarding":
                    if tier == "toptier":
                        awarding_toptier |= Q(awarding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if 'toptier_name' in v:
                            awarding_subtier |= (Q(awarding_subtier_agency_name=name) &
                                                 Q(awarding_toptier_agency_name=v['toptier_name']))
                        else:
                            awarding_subtier |= Q(awarding_subtier_agency_name=name)

            awarding_queryfilter = Q()
            funding_queryfilter = Q()

            # Since these are Q filters, no DB hits for boolean checks
            if funding_toptier:
                funding_queryfilter |= funding_toptier
            if funding_subtier:
                funding_queryfilter |= funding_subtier
            if awarding_toptier:
                awarding_queryfilter |= awarding_toptier
            if awarding_subtier:
                awarding_queryfilter |= awarding_subtier

            queryset = queryset.filter(funding_queryfilter & awarding_queryfilter)

        elif key == "legal_entities":
            # This filter key has effectively become obsolete by recipient_search_text
            msg = 'API request included "{}" key. No filtering will occur with provided value "{}"'
            logger.info(msg.format(key, value))

        elif key == "recipient_search_text":
            def recip_string_parse(recipient_string):
                upper_recipient_string = recipient_string.upper()

                # recipient_name_ts_vector is a postgres TS_Vector
                filter_obj = Q(recipient_name_ts_vector=upper_recipient_string)
                if len(upper_recipient_string) == 9 and upper_recipient_string[:5].isnumeric():
                    filter_obj |= Q(recipient_unique_id=upper_recipient_string)
                return filter_obj

            filter_obj = Q()
            for recipient in value:
                filter_obj |= recip_string_parse(recipient)
            queryset &= queryset.filter(filter_obj)

        elif key == "recipient_scope":
            if value == "domestic":
                queryset = queryset.filter(recipient_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(recipient_scope_q)
            else:
                raise InvalidParameterException('Invalid filter: recipient_scope type is invalid.')

        elif key == "recipient_locations":
            queryset = queryset.filter(geocode_filter_locations('recipient_location', value, True))

        elif key == "recipient_type_names":
            if len(value) != 0:
                queryset &= queryset.filter(Q(business_categories__overlap=value))

        elif key == "place_of_performance_scope":
            if value == "domestic":
                queryset = queryset.filter(pop_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(pop_scope_q)
            else:
                raise InvalidParameterException('Invalid filter: place_of_performance_scope is invalid.')

        elif key == "place_of_performance_locations":
            queryset = queryset.filter(geocode_filter_locations('pop', value, True))

        elif key == "award_amounts":
            queryset &= total_obligation_queryset(value, SubawardView, filters)

        elif key == "award_ids":
            filter_obj = Q()
            for val in value:
                # award_id_string is a Postgres TS_vector
                # award_id_string = piid + fain + uri + subaward_number
                filter_obj |= Q(award_ts_vector=val)
            queryset &= queryset.filter(filter_obj)

        # add "naics_codes" (column naics) after NAICS are mapped to subawards
        elif key in ("program_numbers", "psc_codes", "contract_pricing_type_codes"):
            filter_to_col = {
                "program_numbers": "cfda_number",
                "psc_codes": "product_or_service_code",
                "contract_pricing_type_codes": "type_of_contract_pricing",
            }
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= SubawardView.objects.filter(**{'{}__in'.format(filter_to_col[key]): in_query})

        elif key in ("set_aside_type_codes", "extent_competed_type_codes"):
            or_queryset = Q()
            filter_to_col = {
                "set_aside_type_codes": "type_set_aside",
                "extent_competed_type_codes": "extent_competed",
            }
            in_query = [v for v in value]
            for v in value:
                or_queryset |= Q(**{'{}__exact'.format(filter_to_col[key]): in_query})
            queryset = queryset.filter(or_queryset)

    return queryset
def matview_search_filter(filters, model, for_downloads=False):
    queryset = model.objects.all()

    recipient_scope_q = Q(recipient_location_country_code="USA") | Q(recipient_location_country_name="UNITED STATES")
    pop_scope_q = Q(pop_country_code="USA") | Q(pop_country_name="UNITED STATES")

    faba_flag = False
    faba_queryset = FinancialAccountsByAwards.objects.filter(award__isnull=False)

    for key, value in filters.items():
        if value is None:
            raise InvalidParameterException('Invalid filter: ' + key + ' has null as its value.')

        key_list = [
            'keywords',
            'elasticsearch_keyword',
            'time_period',
            'award_type_codes',
            'agencies',
            'legal_entities',
            'recipient_id',
            'recipient_search_text',
            'recipient_scope',
            'recipient_locations',
            'recipient_type_names',
            'place_of_performance_scope',
            'place_of_performance_locations',
            'award_amounts',
            'award_ids',
            'program_numbers',
            'naics_codes',
            'psc_codes',
            'contract_pricing_type_codes',
            'set_aside_type_codes',
            'extent_competed_type_codes',
            # next 3 keys used by federal account page
            'federal_account_ids',
            'object_class',
            'program_activity'
        ]

        if key not in key_list:
            raise InvalidParameterException('Invalid filter: ' + key + ' does not exist.')

        if key == "keywords":
            def keyword_parse(keyword):
                # keyword_ts_vector & award_ts_vector are Postgres TS_vectors.
                # keyword_ts_vector = recipient_name + naics_code + naics_description
                #     + psc_description + awards_description
                # award_ts_vector = piid + fain + uri
                filter_obj = Q(keyword_ts_vector=keyword) | \
                    Q(award_ts_vector=keyword)
                if keyword.isnumeric():
                    filter_obj |= Q(naics_code__contains=keyword)
                if len(keyword) == 4 and PSC.objects.all().filter(code__iexact=keyword).exists():
                    filter_obj |= Q(product_or_service_code__iexact=keyword)

                return filter_obj

            filter_obj = Q()
            for keyword in value:
                filter_obj |= keyword_parse(keyword)
            potential_duns = list(filter((lambda x: len(x) > 7 and len(x) < 10), value))
            if len(potential_duns) > 0:
                filter_obj |= Q(recipient_unique_id__in=potential_duns) | \
                    Q(parent_recipient_unique_id__in=potential_duns)

            queryset = queryset.filter(filter_obj)

        elif key == "elasticsearch_keyword":
            keyword = " ".join(value) if isinstance(value, list) else value
            transaction_ids = elasticsearch_helper.get_download_ids(keyword=keyword, field='transaction_id')
            # flatten IDs
            transaction_ids = list(itertools.chain.from_iterable(transaction_ids))
            logger.info('Found {} transactions based on keyword: {}'.format(len(transaction_ids), keyword))
            transaction_ids = [str(transaction_id) for transaction_id in transaction_ids]
            queryset &= queryset.extra(
                where=['"transaction_normalized"."id" = ANY(\'{{{}}}\'::int[])'.format(','.join(transaction_ids))])

        elif key == "time_period":
            min_date = API_SEARCH_MIN_DATE
            if for_downloads:
                min_date = API_MIN_DATE
            queryset &= combine_date_range_queryset(value, model, min_date, API_MAX_DATE)

        elif key == "award_type_codes":
            queryset &= queryset.filter(Q(type__in=value))

        elif key == "agencies":
            # TODO: Make function to match agencies in award filter throwing dupe error
            funding_toptier = Q()
            funding_subtier = Q()
            awarding_toptier = Q()
            awarding_subtier = Q()
            for v in value:
                type = v["type"]
                tier = v["tier"]
                name = v["name"]
                if type == "funding":
                    if tier == "toptier":
                        funding_toptier |= Q(funding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if 'toptier_name' in v:
                            funding_subtier |= (Q(funding_subtier_agency_name=name) &
                                                Q(funding_toptier_agency_name=v['toptier_name']))
                        else:
                            funding_subtier |= Q(funding_subtier_agency_name=name)

                elif type == "awarding":
                    if tier == "toptier":
                        awarding_toptier |= Q(awarding_toptier_agency_name=name)
                    elif tier == "subtier":
                        if 'toptier_name' in v:
                            awarding_subtier |= (Q(awarding_subtier_agency_name=name) &
                                                 Q(awarding_toptier_agency_name=v['toptier_name']))
                        else:
                            awarding_subtier |= Q(awarding_subtier_agency_name=name)

            awarding_queryfilter = Q()
            funding_queryfilter = Q()

            # Since these are Q filters, no DB hits for boolean checks
            if funding_toptier:
                funding_queryfilter |= funding_toptier
            if funding_subtier:
                funding_queryfilter |= funding_subtier
            if awarding_toptier:
                awarding_queryfilter |= awarding_toptier
            if awarding_subtier:
                awarding_queryfilter |= awarding_subtier

            queryset = queryset.filter(funding_queryfilter & awarding_queryfilter)

        elif key == "legal_entities":
            # This filter key has effectively become obsolete by recipient_search_text
            msg = 'API request included "{}" key. No filtering will occur with provided value "{}"'
            logger.info(msg.format(key, value))
            # in_query = [v for v in value]
            # if len(in_query) != 0:
            #     queryset &= model.objects.filter(recipient_id__in=in_query)

        elif key == "recipient_search_text":
            all_filters_obj = Q()
            for recip in value:
                upper_recipient_string = str(recip).upper()
                # recipient_name_ts_vector is a postgres TS_Vector
                filter_obj = Q(recipient_name_ts_vector=upper_recipient_string)
                if len(upper_recipient_string) == 9 and upper_recipient_string[:5].isnumeric():
                    filter_obj |= Q(recipient_unique_id=upper_recipient_string)
                all_filters_obj |= filter_obj
            queryset &= queryset.filter(all_filters_obj)

        elif key == "recipient_id":
            filter_obj = Q()
            recipient_hash = value[:-2]

            if value.endswith('P'):  # For parent types, gather all of the children's transactions
                parent_duns_rows = (
                    RecipientProfile.objects.filter(recipient_hash=recipient_hash, recipient_level='P')
                    .values('recipient_unique_id')
                )
                if len(parent_duns_rows) == 1:
                    parent_duns = parent_duns_rows[0]['recipient_unique_id']
                    filter_obj = Q(parent_recipient_unique_id=parent_duns)
                elif len(parent_duns_rows) > 2:
                    # shouldn't occur
                    raise InvalidParameterException('Non-unique parent record found in RecipientProfile')
            elif value.endswith('C'):
                filter_obj = Q(recipient_hash=recipient_hash, parent_recipient_unique_id__isnull=False)
            else:
                # "R" recipient level
                filter_obj = Q(recipient_hash=recipient_hash, parent_recipient_unique_id__isnull=True)
            queryset &= queryset.filter(filter_obj)

        elif key == "recipient_scope":
            if value == "domestic":
                queryset = queryset.filter(recipient_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(recipient_scope_q)
            else:
                raise InvalidParameterException('Invalid filter: recipient_scope type is invalid.')

        elif key == "recipient_locations":
            queryset = queryset.filter(geocode_filter_locations('recipient_location', value, True))

        elif key == "recipient_type_names":
            if len(value) != 0:
                queryset &= queryset.filter(Q(business_categories__overlap=value))

        elif key == "place_of_performance_scope":
            if value == "domestic":
                queryset = queryset.filter(pop_scope_q)
            elif value == "foreign":
                queryset = queryset.exclude(pop_scope_q)
            else:
                raise InvalidParameterException('Invalid filter: place_of_performance_scope is invalid.')

        elif key == "place_of_performance_locations":
            queryset = queryset.filter(geocode_filter_locations('pop', value, True))

        elif key == "award_amounts":
            queryset &= total_obligation_queryset(value, model, filters)

        elif key == "award_ids":
            filter_obj = Q()
            for val in value:
                # award_id_string is a Postgres TS_vector
                # award_id_string = piid + fain + uri
                filter_obj |= Q(award_ts_vector=val)
            queryset &= queryset.filter(filter_obj)

        elif key == "program_numbers":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= queryset.filter(Q(cfda_number__in=in_query))

        elif key == "naics_codes":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= queryset.filter(Q(naics_code__in=in_query))

        elif key == "psc_codes":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= queryset.filter(Q(product_or_service_code__in=in_query))

        elif key == "contract_pricing_type_codes":
            in_query = [v for v in value]
            if len(in_query) != 0:
                queryset &= queryset.filter(Q(type_of_contract_pricing__in=in_query))

        elif key == "set_aside_type_codes":
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(type_set_aside__exact=v)
            queryset &= queryset.filter(or_queryset)

        elif key == "extent_competed_type_codes":
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(extent_competed__exact=v)
            queryset &= queryset.filter(or_queryset)

        # Federal Account Filter
        elif key == "federal_account_ids":
            faba_flag = True
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(treasury_account__federal_account_id=v)
            faba_queryset = faba_queryset.filter(or_queryset)

        # Federal Account Filter
        elif key == "object_class":
            faba_flag = True
            result = Q()
            for oc in value:
                subresult = Q()
                for (key, values) in oc.items():
                    subresult &= filter_on("treasury_account__program_balances__object_class", key, values)
                result |= subresult
            faba_queryset = faba_queryset.filter(result)

        # Federal Account Filter
        elif key == "program_activity":
            faba_flag = True
            or_queryset = Q()
            for v in value:
                or_queryset |= Q(treasury_account__program_balances__program_activity__program_activity_code=v)
            faba_queryset = faba_queryset.filter(or_queryset)

    if faba_flag:
        award_ids = faba_queryset.values('award_id')
        queryset &= queryset.filter(Q(award_id__in=award_ids))

    return queryset