def test_get_download_ids(monkeypatch, transaction_type_data, elasticsearch_transaction_index): setup_elasticsearch_test(monkeypatch, elasticsearch_transaction_index) results = get_download_ids(["pop tart"], "transaction_id") transaction_ids = list(itertools.chain.from_iterable(results)) expected_results = [1, 2, 3, 4, 5, 6] assert transaction_ids == expected_results
def matview_search_filter(filters, model, for_downloads=False): queryset = model.objects.all() recipient_scope_q = Q(recipient_location_country_code="USA") | Q( recipient_location_country_name="UNITED STATES") pop_scope_q = Q(pop_country_code="USA") | Q( pop_country_name="UNITED STATES") faba_flag = False faba_queryset = FinancialAccountsByAwards.objects.filter( award__isnull=False) for key, value in filters.items(): if value is None: raise InvalidParameterException('Invalid filter: ' + key + ' has null as its value.') key_list = [ 'keywords', 'elasticsearch_keyword', 'time_period', 'award_type_codes', 'agencies', 'legal_entities', 'recipient_id', 'recipient_search_text', 'recipient_scope', 'recipient_locations', 'recipient_type_names', 'place_of_performance_scope', 'place_of_performance_locations', 'award_amounts', 'award_ids', 'program_numbers', 'naics_codes', 'psc_codes', 'contract_pricing_type_codes', 'set_aside_type_codes', 'extent_competed_type_codes', # next 3 keys used by federal account page 'federal_account_ids', 'object_class', 'program_activity' ] if key not in key_list: raise InvalidParameterException('Invalid filter: ' + key + ' does not exist.') if key == "keywords": def keyword_parse(keyword): # keyword_ts_vector & award_ts_vector are Postgres TS_vectors. # keyword_ts_vector = recipient_name + naics_code + naics_description # + psc_description + awards_description # award_ts_vector = piid + fain + uri filter_obj = Q(keyword_ts_vector=keyword) | Q( award_ts_vector=keyword) if keyword.isnumeric(): filter_obj |= Q(naics_code__contains=keyword) if len(keyword) == 4 and PSC.objects.all().filter( code__iexact=keyword).exists(): filter_obj |= Q(product_or_service_code__iexact=keyword) return filter_obj filter_obj = Q() for keyword in value: filter_obj |= keyword_parse(keyword) potential_duns = list( filter((lambda x: len(x) > 7 and len(x) < 10), value)) if len(potential_duns) > 0: filter_obj |= Q(recipient_unique_id__in=potential_duns) | \ Q(parent_recipient_unique_id__in=potential_duns) queryset = queryset.filter(filter_obj) elif key == "elasticsearch_keyword": keyword = " ".join(value) if isinstance(value, list) else value transaction_ids = elasticsearch_helper.get_download_ids( keyword=keyword, field='transaction_id') # flatten IDs transaction_ids = list( itertools.chain.from_iterable(transaction_ids)) logger.info('Found {} transactions based on keyword: {}'.format( len(transaction_ids), keyword)) transaction_ids = [ str(transaction_id) for transaction_id in transaction_ids ] queryset = queryset.extra(where=[ '"transaction_normalized"."id" = ANY(\'{{{}}}\'::int[])'. format(','.join(transaction_ids)) ]) elif key == "time_period": min_date = API_SEARCH_MIN_DATE if for_downloads: min_date = API_MIN_DATE queryset &= combine_date_range_queryset(value, model, min_date, API_MAX_DATE) elif key == "award_type_codes": queryset = queryset.filter(type__in=value) elif key == "agencies": # TODO: Make function to match agencies in award filter throwing dupe error funding_toptier = Q() funding_subtier = Q() awarding_toptier = Q() awarding_subtier = Q() for v in value: type = v["type"] tier = v["tier"] name = v["name"] if type == "funding": if tier == "toptier": funding_toptier |= Q(funding_toptier_agency_name=name) elif tier == "subtier": if 'toptier_name' in v: funding_subtier |= ( Q(funding_subtier_agency_name=name) & Q(funding_toptier_agency_name=v['toptier_name'] )) else: funding_subtier |= Q( funding_subtier_agency_name=name) elif type == "awarding": if tier == "toptier": awarding_toptier |= Q( awarding_toptier_agency_name=name) elif tier == "subtier": if 'toptier_name' in v: awarding_subtier |= (Q( awarding_subtier_agency_name=name ) & Q( awarding_toptier_agency_name=v['toptier_name']) ) else: awarding_subtier |= Q( awarding_subtier_agency_name=name) awarding_queryfilter = Q() funding_queryfilter = Q() # Since these are Q filters, no DB hits for boolean checks if funding_toptier: funding_queryfilter |= funding_toptier if funding_subtier: funding_queryfilter |= funding_subtier if awarding_toptier: awarding_queryfilter |= awarding_toptier if awarding_subtier: awarding_queryfilter |= awarding_subtier queryset = queryset.filter(funding_queryfilter & awarding_queryfilter) elif key == "legal_entities": # This filter key has effectively become obsolete by recipient_search_text msg = 'API request included "{}" key. No filtering will occur with provided value "{}"' logger.info(msg.format(key, value)) # in_query = [v for v in value] # if len(in_query) != 0: # queryset &= model.objects.filter(recipient_id__in=in_query) elif key == "recipient_search_text": all_filters_obj = Q() for recip in value: upper_recipient_string = str(recip).upper() # recipient_name_ts_vector is a postgres TS_Vector filter_obj = Q(recipient_name_ts_vector=upper_recipient_string) if len(upper_recipient_string ) == 9 and upper_recipient_string[:5].isnumeric(): filter_obj |= Q(recipient_unique_id=upper_recipient_string) all_filters_obj |= filter_obj queryset = queryset.filter(all_filters_obj) elif key == "recipient_id": filter_obj = Q() recipient_hash = value[:-2] if value.endswith( 'P' ): # For parent types, gather all of the children's transactions parent_duns_rows = (RecipientProfile.objects.filter( recipient_hash=recipient_hash, recipient_level='P').values('recipient_unique_id')) if len(parent_duns_rows) == 1: parent_duns = parent_duns_rows[0]['recipient_unique_id'] filter_obj = Q(parent_recipient_unique_id=parent_duns) elif len(parent_duns_rows) > 2: # shouldn't occur raise InvalidParameterException( 'Non-unique parent record found in RecipientProfile') elif value.endswith('C'): filter_obj = Q(recipient_hash=recipient_hash, parent_recipient_unique_id__isnull=False) else: # "R" recipient level filter_obj = Q(recipient_hash=recipient_hash, parent_recipient_unique_id__isnull=True) queryset = queryset.filter(filter_obj) elif key == "recipient_scope": if value == "domestic": queryset = queryset.filter(recipient_scope_q) elif value == "foreign": queryset = queryset.exclude(recipient_scope_q) else: raise InvalidParameterException( 'Invalid filter: recipient_scope type is invalid.') elif key == "recipient_locations": queryset = queryset.filter( geocode_filter_locations("recipient_location", value, True)) elif key == "recipient_type_names": if len(value) != 0: queryset = queryset.filter(business_categories__overlap=value) elif key == "place_of_performance_scope": if value == "domestic": queryset = queryset.filter(pop_scope_q) elif value == "foreign": queryset = queryset.exclude(pop_scope_q) else: raise InvalidParameterException( 'Invalid filter: place_of_performance_scope is invalid.') elif key == "place_of_performance_locations": queryset = queryset.filter( geocode_filter_locations("pop", value, True)) elif key == "award_amounts": queryset &= total_obligation_queryset(value, model, filters) elif key == "award_ids": filter_obj = Q() for val in value: # award_id_string is a Postgres TS_vector # award_id_string = piid + fain + uri filter_obj |= Q(award_ts_vector=val) queryset = queryset.filter(filter_obj) elif key == "program_numbers": in_query = [v for v in value] if len(in_query) != 0: queryset = queryset.filter(cfda_number__in=in_query) elif key == "naics_codes": in_query = [v for v in value] if len(in_query) != 0: queryset = queryset.filter(naics_code__in=in_query) elif key == "psc_codes": in_query = [v for v in value] if len(in_query) != 0: queryset = queryset.filter( product_or_service_code__in=in_query) elif key == "contract_pricing_type_codes": in_query = [v for v in value] if len(in_query) != 0: queryset = queryset.filter( type_of_contract_pricing__in=in_query) elif key == "set_aside_type_codes": or_queryset = Q() for v in value: or_queryset |= Q(type_set_aside__exact=v) queryset = queryset.filter(or_queryset) elif key == "extent_competed_type_codes": or_queryset = Q() for v in value: or_queryset |= Q(extent_competed__exact=v) queryset = queryset.filter(or_queryset) # Federal Account Filter elif key == "federal_account_ids": faba_flag = True or_queryset = Q() for v in value: or_queryset |= Q(treasury_account__federal_account_id=v) faba_queryset = faba_queryset.filter(or_queryset) # Federal Account Filter elif key == "object_class": faba_flag = True result = Q() for oc in value: subresult = Q() for (key, values) in oc.items(): subresult &= filter_on( "treasury_account__program_balances__object_class", key, values) result |= subresult faba_queryset = faba_queryset.filter(result) # Federal Account Filter elif key == "program_activity": faba_flag = True or_queryset = Q() for v in value: or_queryset |= Q( treasury_account__program_balances__program_activity__program_activity_code =v) faba_queryset = faba_queryset.filter(or_queryset) if faba_flag: award_ids = faba_queryset.values('award_id') queryset = queryset.filter(award_id__in=award_ids) return queryset
def matview_search_filter(filters, model, for_downloads=False): queryset = model.objects.all() recipient_scope_q = Q(recipient_location_country_code="USA") | Q(recipient_location_country_name="UNITED STATES") pop_scope_q = Q(pop_country_code="USA") | Q(pop_country_name="UNITED STATES") faba_flag = False faba_queryset = FinancialAccountsByAwards.objects.filter(award__isnull=False) for key, value in filters.items(): if value is None: raise InvalidParameterException("Invalid filter: " + key + " has null as its value.") key_list = [ "keywords", "elasticsearch_keyword", "time_period", "award_type_codes", "prime_and_sub_award_types", "agencies", "legal_entities", "recipient_id", "recipient_search_text", "recipient_scope", "recipient_locations", "recipient_type_names", "place_of_performance_scope", "place_of_performance_locations", "award_amounts", "award_ids", "program_numbers", "naics_codes", PSCCodes.underscore_name, "contract_pricing_type_codes", "set_aside_type_codes", "extent_competed_type_codes", TasCodes.underscore_name, TreasuryAccounts.underscore_name, # next 3 keys used by federal account page "federal_account_ids", "object_class", "program_activity", ] if key not in key_list: raise InvalidParameterException("Invalid filter: " + key + " does not exist.") if key == "keywords": def keyword_parse(keyword): # keyword_ts_vector & award_ts_vector are Postgres TS_vectors. # keyword_ts_vector = recipient_name + naics_code + naics_description # + psc_description + awards_description # award_ts_vector = piid + fain + uri filter_obj = Q(keyword_ts_vector=keyword) | Q(award_ts_vector=keyword) if keyword.isnumeric(): filter_obj |= Q(naics_code__contains=keyword) if len(keyword) == 4 and PSC.objects.all().filter(code__iexact=keyword).exists(): filter_obj |= Q(product_or_service_code__iexact=keyword) return filter_obj filter_obj = Q() for keyword in value: filter_obj |= keyword_parse(keyword) potential_duns = list(filter((lambda x: len(x) > 7 and len(x) < 10), value)) if len(potential_duns) > 0: filter_obj |= Q(recipient_unique_id__in=potential_duns) | Q( parent_recipient_unique_id__in=potential_duns ) queryset = queryset.filter(filter_obj) elif key == "elasticsearch_keyword": keyword = " ".join(value) if isinstance(value, list) else value transaction_ids = elasticsearch_helper.get_download_ids(keyword=keyword, field="transaction_id") # flatten IDs transaction_ids = list(itertools.chain.from_iterable(transaction_ids)) logger.info("Found {} transactions based on keyword: {}".format(len(transaction_ids), keyword)) transaction_ids = [str(transaction_id) for transaction_id in transaction_ids] queryset = queryset.extra( where=['"transaction_normalized"."id" = ANY(\'{{{}}}\'::int[])'.format(",".join(transaction_ids))] ) elif key == "time_period": min_date = API_SEARCH_MIN_DATE if for_downloads: min_date = API_MIN_DATE queryset &= combine_date_range_queryset(value, model, min_date, API_MAX_DATE) elif key == "award_type_codes": queryset = queryset.filter(type__in=value) elif key == "prime_and_sub_award_types": award_types = value.get("prime_awards") if award_types: queryset = queryset.filter(type__in=award_types) elif key == "agencies": # TODO: Make function to match agencies in award filter throwing dupe error funding_toptier = Q() funding_subtier = Q() awarding_toptier = Q() awarding_subtier = Q() for v in value: type = v["type"] tier = v["tier"] name = v["name"] if type == "funding": if tier == "toptier": funding_toptier |= Q(funding_toptier_agency_name=name) elif tier == "subtier": if "toptier_name" in v: funding_subtier |= Q(funding_subtier_agency_name=name) & Q( funding_toptier_agency_name=v["toptier_name"] ) else: funding_subtier |= Q(funding_subtier_agency_name=name) elif type == "awarding": if tier == "toptier": awarding_toptier |= Q(awarding_toptier_agency_name=name) elif tier == "subtier": if "toptier_name" in v: awarding_subtier |= Q(awarding_subtier_agency_name=name) & Q( awarding_toptier_agency_name=v["toptier_name"] ) else: awarding_subtier |= Q(awarding_subtier_agency_name=name) awarding_queryfilter = Q() funding_queryfilter = Q() # Since these are Q filters, no DB hits for boolean checks if funding_toptier: funding_queryfilter |= funding_toptier if funding_subtier: funding_queryfilter |= funding_subtier if awarding_toptier: awarding_queryfilter |= awarding_toptier if awarding_subtier: awarding_queryfilter |= awarding_subtier queryset = queryset.filter(funding_queryfilter & awarding_queryfilter) elif key == "legal_entities": # This filter key has effectively become obsolete by recipient_search_text msg = 'API request included "{}" key. No filtering will occur with provided value "{}"' logger.info(msg.format(key, value)) # in_query = [v for v in value] # if len(in_query) != 0: # queryset &= model.objects.filter(recipient_id__in=in_query) elif key == "recipient_search_text": all_filters_obj = Q() for recip in value: upper_recipient_string = str(recip).upper() # recipient_name_ts_vector is a postgres TS_Vector filter_obj = Q(recipient_name_ts_vector=upper_recipient_string) if len(upper_recipient_string) == 9 and upper_recipient_string[:5].isnumeric(): filter_obj |= Q(recipient_unique_id=upper_recipient_string) all_filters_obj |= filter_obj queryset = queryset.filter(all_filters_obj) elif key == "recipient_id": filter_obj = Q() recipient_hash = value[:-2] if value.endswith("P"): # For parent types, gather all of the children's transactions parent_duns_rows = RecipientProfile.objects.filter( recipient_hash=recipient_hash, recipient_level="P" ).values("recipient_unique_id") if len(parent_duns_rows) == 1: parent_duns = parent_duns_rows[0]["recipient_unique_id"] filter_obj = Q(parent_recipient_unique_id=parent_duns) elif len(parent_duns_rows) > 2: # shouldn't occur raise InvalidParameterException("Non-unique parent record found in RecipientProfile") elif value.endswith("C"): filter_obj = Q(recipient_hash=recipient_hash, parent_recipient_unique_id__isnull=False) else: # "R" recipient level filter_obj = Q(recipient_hash=recipient_hash, parent_recipient_unique_id__isnull=True) queryset = queryset.filter(filter_obj) elif key == "recipient_scope": if value == "domestic": queryset = queryset.filter(recipient_scope_q) elif value == "foreign": queryset = queryset.exclude(recipient_scope_q) else: raise InvalidParameterException("Invalid filter: recipient_scope type is invalid.") elif key == "recipient_locations": queryset = queryset.filter(geocode_filter_locations("recipient_location", value)) elif key == "recipient_type_names": if len(value) != 0: queryset = queryset.filter(business_categories__overlap=value) elif key == "place_of_performance_scope": if value == "domestic": queryset = queryset.filter(pop_scope_q) elif value == "foreign": queryset = queryset.exclude(pop_scope_q) else: raise InvalidParameterException("Invalid filter: place_of_performance_scope is invalid.") elif key == "place_of_performance_locations": queryset = queryset.filter(geocode_filter_locations("pop", value)) elif key == "award_amounts": queryset &= total_obligation_queryset(value, model, filters) elif key == "award_ids": queryset = build_award_ids_filter(queryset, value, ("piid", "fain", "uri")) elif key == "program_numbers": in_query = [v for v in value] if len(in_query) != 0: queryset = queryset.filter(cfda_number__in=in_query) elif key == "naics_codes": if isinstance(value, list): require = value elif isinstance(value, dict): require = value.get("require") or [] if value.get("exclude"): raise NotImplementedException( "NOT IMPLEMENTED: postgres endpoint does not currently support excluded naics!" ) else: raise InvalidParameterException("naics_codes must be an array or object") if [value for value in require if len(str(value)) not in [2, 4, 6]]: raise InvalidParameterException( "naics code filtering only supported for codes with lengths of 2, 4, and 6" ) regex = f"^({'|'.join([str(elem) for elem in require])}).*" queryset = queryset.filter(naics_code__regex=regex) elif key == PSCCodes.underscore_name: q = PSCCodes.build_tas_codes_filter(value) queryset = queryset.filter(q) if q else queryset elif key == "contract_pricing_type_codes": in_query = [v for v in value] if len(in_query) != 0: queryset = queryset.filter(type_of_contract_pricing__in=in_query) elif key == "set_aside_type_codes": or_queryset = Q() for v in value: or_queryset |= Q(type_set_aside__exact=v) queryset = queryset.filter(or_queryset) elif key == "extent_competed_type_codes": or_queryset = Q() for v in value: or_queryset |= Q(extent_competed__exact=v) queryset = queryset.filter(or_queryset) # Because these two filters OR with each other, we need to know about the presence of both filters to know what to do # This filter was picked arbitrarily to be the one that checks for the other elif key == TasCodes.underscore_name: q = TasCodes.build_tas_codes_filter(queryset, value) if TreasuryAccounts.underscore_name in filters.keys(): q |= TreasuryAccounts.build_tas_codes_filter(queryset, filters[TreasuryAccounts.underscore_name]) queryset = queryset.filter(q) elif key == TreasuryAccounts.underscore_name and TasCodes.underscore_name not in filters.keys(): queryset = queryset.filter(TreasuryAccounts.build_tas_codes_filter(queryset, value)) # Federal Account Filter elif key == "federal_account_ids": faba_flag = True or_queryset = Q() for v in value: or_queryset |= Q(treasury_account__federal_account_id=v) faba_queryset = faba_queryset.filter(or_queryset) # Federal Account Filter elif key == "object_class": result = Q() for oc in value: subresult = Q() subresult &= filter_on("award__financial_set__object_class", "object_class", oc) result |= subresult queryset = queryset.filter(result) # Federal Account Filter elif key == "program_activity": or_queryset = Q() for v in value: or_queryset |= Q(award__financial_set__program_activity__id=v) queryset = queryset.filter(or_queryset) if faba_flag: award_ids = faba_queryset.values("award_id") queryset = queryset.filter(award_id__in=award_ids) return queryset
def subaward_filter(filters, for_downloads=False): queryset = SubawardView.objects.all() recipient_scope_q = Q(recipient_location_country_code="USA") | Q( recipient_location_country_name="UNITED STATES") pop_scope_q = Q(pop_country_code="USA") | Q( pop_country_name="UNITED STATES") for key, value in filters.items(): if value is None: raise InvalidParameterException("Invalid filter: " + key + " has null as its value.") key_list = [ "keywords", "elasticsearch_keyword", "time_period", "award_type_codes", "prime_and_sub_award_types", "agencies", "legal_entities", "recipient_search_text", "recipient_scope", "recipient_locations", "recipient_type_names", "place_of_performance_scope", "place_of_performance_locations", "award_amounts", "award_ids", "program_numbers", "naics_codes", PSCCodes.underscore_name, "contract_pricing_type_codes", "set_aside_type_codes", "extent_competed_type_codes", TasCodes.underscore_name, TreasuryAccounts.underscore_name, ] if key not in key_list: raise InvalidParameterException("Invalid filter: " + key + " does not exist.") if key == "keywords": def keyword_parse(keyword): # keyword_ts_vector & award_ts_vector are Postgres TS_vectors. # keyword_ts_vector = recipient_name + psc_description + subaward_description # award_ts_vector = piid + fain + uri + subaward_number filter_obj = Q(keyword_ts_vector=keyword) | Q( award_ts_vector=keyword) # Commenting out until NAICS is associated with subawards in DAIMS 1.3.1 # if keyword.isnumeric(): # filter_obj |= Q(naics_code__contains=keyword) if len(keyword) == 4 and PSC.objects.filter( code__iexact=keyword).exists(): filter_obj |= Q(product_or_service_code__iexact=keyword) return filter_obj filter_obj = Q() for keyword in value: filter_obj |= keyword_parse(keyword) potential_duns = list( filter((lambda x: len(x) > 7 and len(x) < 10), value)) if len(potential_duns) > 0: filter_obj |= Q(recipient_unique_id__in=potential_duns) | Q( parent_recipient_unique_id__in=potential_duns) queryset = queryset.filter(filter_obj) elif key == "elasticsearch_keyword": keyword = value transaction_ids = elasticsearch_helper.get_download_ids( keyword=keyword, field="transaction_id") # flatten IDs transaction_ids = list( itertools.chain.from_iterable(transaction_ids)) logger.info("Found {} transactions based on keyword: {}".format( len(transaction_ids), keyword)) transaction_ids = [ str(transaction_id) for transaction_id in transaction_ids ] queryset = queryset.filter(latest_transaction_id__isnull=False) # Prepare a SQL snippet to include in the predicate for searching an array of transaction IDs sql_fragment = '"subaward_view"."latest_transaction_id" = ANY(\'{{{}}}\'::int[])' # int[] -> int array type queryset = queryset.extra( where=[sql_fragment.format(",".join(transaction_ids))]) elif key == "time_period": min_date = API_SEARCH_MIN_DATE if for_downloads: min_date = API_MIN_DATE queryset &= combine_date_range_queryset(value, SubawardView, min_date, API_MAX_DATE) elif key == "award_type_codes": queryset = queryset.filter(prime_award_type__in=value) elif key == "prime_and_sub_award_types": award_types = value.get("sub_awards") if award_types: queryset = queryset.filter(award_type__in=award_types) elif key == "agencies": # TODO: Make function to match agencies in award filter throwing dupe error funding_toptier = Q() funding_subtier = Q() awarding_toptier = Q() awarding_subtier = Q() for v in value: type = v["type"] tier = v["tier"] name = v["name"] if type == "funding": if tier == "toptier": funding_toptier |= Q(funding_toptier_agency_name=name) elif tier == "subtier": if "toptier_name" in v: funding_subtier |= Q( funding_subtier_agency_name=name ) & Q( funding_toptier_agency_name=v["toptier_name"]) else: funding_subtier |= Q( funding_subtier_agency_name=name) elif type == "awarding": if tier == "toptier": awarding_toptier |= Q( awarding_toptier_agency_name=name) elif tier == "subtier": if "toptier_name" in v: awarding_subtier |= Q( awarding_subtier_agency_name=name ) & Q( awarding_toptier_agency_name=v["toptier_name"]) else: awarding_subtier |= Q( awarding_subtier_agency_name=name) awarding_queryfilter = Q() funding_queryfilter = Q() # Since these are Q filters, no DB hits for boolean checks if funding_toptier: funding_queryfilter |= funding_toptier if funding_subtier: funding_queryfilter |= funding_subtier if awarding_toptier: awarding_queryfilter |= awarding_toptier if awarding_subtier: awarding_queryfilter |= awarding_subtier queryset = queryset.filter(funding_queryfilter & awarding_queryfilter) elif key == "legal_entities": # This filter key has effectively become obsolete by recipient_search_text msg = 'API request included "{}" key. No filtering will occur with provided value "{}"' logger.info(msg.format(key, value)) elif key == "recipient_search_text": def recip_string_parse(recipient_string): upper_recipient_string = recipient_string.upper() # recipient_name_ts_vector is a postgres TS_Vector filter_obj = Q(recipient_name_ts_vector=upper_recipient_string) if len(upper_recipient_string ) == 9 and upper_recipient_string[:5].isnumeric(): filter_obj |= Q(recipient_unique_id=upper_recipient_string) return filter_obj filter_obj = Q() for recipient in value: filter_obj |= recip_string_parse(recipient) queryset = queryset.filter(filter_obj) elif key == "recipient_scope": if value == "domestic": queryset = queryset.filter(recipient_scope_q) elif value == "foreign": queryset = queryset.exclude(recipient_scope_q) else: raise InvalidParameterException( "Invalid filter: recipient_scope type is invalid.") elif key == "recipient_locations": queryset = queryset.filter( geocode_filter_locations("recipient_location", value)) elif key == "recipient_type_names": if len(value) != 0: queryset = queryset.filter(business_categories__overlap=value) elif key == "place_of_performance_scope": if value == "domestic": queryset = queryset.filter(pop_scope_q) elif value == "foreign": queryset = queryset.exclude(pop_scope_q) else: raise InvalidParameterException( "Invalid filter: place_of_performance_scope is invalid.") elif key == "place_of_performance_locations": queryset = queryset.filter(geocode_filter_locations("pop", value)) elif key == "award_amounts": queryset &= total_obligation_queryset(value, SubawardView, filters) elif key == "award_ids": queryset = build_award_ids_filter(queryset, value, ("piid", "fain")) elif key == PSCCodes.underscore_name: q = PSCCodes.build_tas_codes_filter(value) queryset = queryset.filter(q) if q else queryset # add "naics_codes" (column naics) after NAICS are mapped to subawards elif key in ("program_numbers", "contract_pricing_type_codes"): filter_to_col = { "program_numbers": "cfda_number", "contract_pricing_type_codes": "type_of_contract_pricing", } in_query = [v for v in value] if len(in_query) != 0: queryset &= SubawardView.objects.filter( **{"{}__in".format(filter_to_col[key]): in_query}) elif key in ("set_aside_type_codes", "extent_competed_type_codes"): or_queryset = Q() filter_to_col = { "set_aside_type_codes": "type_set_aside", "extent_competed_type_codes": "extent_competed" } in_query = [v for v in value] for v in value: or_queryset |= Q( **{"{}__exact".format(filter_to_col[key]): in_query}) queryset = queryset.filter(or_queryset) # Because these two filters OR with each other, we need to know about the presense of both filters to know what to do # This filter was picked arbitrarily to be the one that checks for the other elif key == TasCodes.underscore_name: q = TasCodes.build_tas_codes_filter(queryset, value) if TreasuryAccounts.underscore_name in filters.keys(): q |= TreasuryAccounts.build_tas_codes_filter( queryset, filters[TreasuryAccounts.underscore_name]) queryset = queryset.filter(q) elif key == TreasuryAccounts.underscore_name and TasCodes.underscore_name not in filters.keys( ): queryset = queryset.filter( TreasuryAccounts.build_tas_codes_filter(queryset, value)) return queryset
def subaward_filter(filters, for_downloads=False): queryset = SubawardView.objects.all() for key, value in filters.items(): if value is None: raise InvalidParameterException('Invalid filter: ' + key + ' has null as its value.') key_list = [ 'keywords', 'elasticsearch_keyword', 'time_period', 'award_type_codes', 'agencies', 'legal_entities', 'recipient_search_text', 'recipient_scope', 'recipient_locations', 'recipient_type_names', 'place_of_performance_scope', 'place_of_performance_locations', 'award_amounts', 'award_ids', 'program_numbers', 'naics_codes', 'psc_codes', 'contract_pricing_type_codes', 'set_aside_type_codes', 'extent_competed_type_codes' ] if key not in key_list: raise InvalidParameterException('Invalid filter: ' + key + ' does not exist.') if key == "keywords": def keyword_parse(keyword): # keyword_ts_vector & award_ts_vector are Postgres TS_vectors. # keyword_ts_vector = recipient_name + psc_description + subaward_description # award_ts_vector = piid + fain + uri + subaward_number filter_obj = Q(keyword_ts_vector=keyword) | \ Q(award_ts_vector=keyword) if keyword.isnumeric(): filter_obj |= Q(naics_code__contains=keyword) if len(keyword) == 4 and PSC.objects.all().filter( code__iexact=keyword).exists(): filter_obj |= Q(product_or_service_code__iexact=keyword) return filter_obj filter_obj = Q() for keyword in value: filter_obj |= keyword_parse(keyword) potential_duns = list( filter((lambda x: len(x) > 7 and len(x) < 10), value)) if len(potential_duns) > 0: filter_obj |= Q(recipient_unique_id__in=potential_duns) | \ Q(parent_recipient_unique_id__in=potential_duns) queryset = queryset.filter(filter_obj) elif key == "elasticsearch_keyword": keyword = value transaction_ids = elasticsearch_helper.get_download_ids( keyword=keyword, field='transaction_id') # flatten IDs transaction_ids = list( itertools.chain.from_iterable(transaction_ids)) logger.info('Found {} transactions based on keyword: {}'.format( len(transaction_ids), keyword)) transaction_ids = [ str(transaction_id) for transaction_id in transaction_ids ] queryset = queryset.filter(latest_transaction_id__isnull=False) queryset &= queryset.extra(where=[ '"latest_transaction_id" = ANY(\'{{{}}}\'::int[])'.format( ','.join(transaction_ids)) ]) elif key == "time_period": min_date = API_SEARCH_MIN_DATE if for_downloads: min_date = API_MIN_DATE queryset &= combine_date_range_queryset(value, SubawardView, min_date, API_MAX_DATE) elif key == "award_type_codes": idv_flag = all(i in value for i in contract_type_mapping.keys()) if len(value) != 0: filter_obj = Q(prime_award_type__in=value) if idv_flag: filter_obj |= Q(pulled_from='IDV') queryset &= SubawardView.objects.filter(filter_obj) elif key == "agencies": # TODO: Make function to match agencies in award filter throwing dupe error funding_toptier = Q() funding_subtier = Q() awarding_toptier = Q() awarding_subtier = Q() for v in value: type = v["type"] tier = v["tier"] name = v["name"] if type == "funding": if tier == "toptier": funding_toptier |= Q(funding_toptier_agency_name=name) elif tier == "subtier": if 'toptier_name' in v: funding_subtier |= ( Q(funding_subtier_agency_name=name) & Q(funding_toptier_agency_name=v['toptier_name'] )) else: funding_subtier |= Q( funding_subtier_agency_name=name) elif type == "awarding": if tier == "toptier": awarding_toptier |= Q( awarding_toptier_agency_name=name) elif tier == "subtier": if 'toptier_name' in v: awarding_subtier |= (Q( awarding_subtier_agency_name=name ) & Q( awarding_toptier_agency_name=v['toptier_name']) ) else: awarding_subtier |= Q( awarding_subtier_agency_name=name) awarding_queryfilter = Q() funding_queryfilter = Q() # Since these are Q filters, no DB hits for boolean checks if funding_toptier: funding_queryfilter |= funding_toptier if funding_subtier: funding_queryfilter |= funding_subtier if awarding_toptier: awarding_queryfilter |= awarding_toptier if awarding_subtier: awarding_queryfilter |= awarding_subtier queryset = queryset.filter(funding_queryfilter & awarding_queryfilter) elif key == "legal_entities": # This filter key has effectively become obsolete by recipient_search_text msg = 'API request included "{}" key. No filtering will occur with provided value "{}"' logger.info(msg.format(key, value)) elif key == "recipient_search_text": def recip_string_parse(recipient_string): upper_recipient_string = recipient_string.upper() # recipient_name_ts_vector is a postgres TS_Vector filter_obj = Q(recipient_name_ts_vector=upper_recipient_string) if len(upper_recipient_string ) == 9 and upper_recipient_string[:5].isnumeric(): filter_obj |= Q(recipient_unique_id=upper_recipient_string) return filter_obj filter_obj = Q() for recipient in value: filter_obj |= recip_string_parse(recipient) queryset &= SubawardView.objects.filter(filter_obj) elif key == "recipient_scope": if value == "domestic": queryset = queryset.filter( recipient_location_country_name="UNITED STATES") elif value == "foreign": queryset = queryset.exclude( recipient_location_country_name="UNITED STATES") else: raise InvalidParameterException( 'Invalid filter: recipient_scope type is invalid.') elif key == "recipient_locations": or_queryset = geocode_filter_locations('recipient_location', value, SubawardView, True) queryset &= or_queryset elif key == "recipient_type_names": if len(value) != 0: queryset &= SubawardView.objects.filter( business_categories__overlap=value) elif key == "place_of_performance_scope": if value == "domestic": queryset = queryset.filter( Q(pop_country_name="UNITED STATES") | Q(pop_country_code="USA")) elif value == "foreign": queryset = queryset.exclude( Q(pop_country_name="UNITED STATES") | Q(pop_country_code="USA")) else: raise InvalidParameterException( 'Invalid filter: place_of_performance_scope is invalid.') elif key == "place_of_performance_locations": queryset &= geocode_filter_locations('pop', value, SubawardView, True) elif key == "award_amounts": queryset &= total_obligation_queryset(value, SubawardView, filters) elif key == "award_ids": filter_obj = Q() for val in value: # award_id_string is a Postgres TS_vector # award_id_string = piid + fain + uri + subaward_number filter_obj |= Q(award_ts_vector=val) queryset &= SubawardView.objects.filter(filter_obj) # add "naics_codes" (column naics) after NAICS are mapped to subawards elif key in ("program_numbers", "psc_codes", "contract_pricing_type_codes"): filter_to_col = { "program_numbers": "cfda_number", "psc_codes": "product_or_service_code", "contract_pricing_type_codes": "type_of_contract_pricing", } in_query = [v for v in value] if len(in_query) != 0: queryset &= SubawardView.objects.filter( **{'{}__in'.format(filter_to_col[key]): in_query}) elif key in ("set_aside_type_codes", "extent_competed_type_codes"): or_queryset = Q() filter_to_col = { "set_aside_type_codes": "type_set_aside", "extent_competed_type_codes": "extent_competed", } in_query = [v for v in value] for v in value: or_queryset |= Q( **{'{}__exact'.format(filter_to_col[key]): in_query}) queryset = queryset.filter(or_queryset) return queryset
def subaward_filter(filters): queryset = Subaward.objects.filter( award__latest_transaction_id__isnull=False, award__category__isnull=False) for key, value in filters.items(): if value is None: raise InvalidParameterException('Invalid filter: ' + key + ' has null as its value.') key_list = [ 'keyword', 'elasticsearch_keyword', 'time_period', 'award_type_codes', 'agencies', 'legal_entities', 'recipient_search_text', 'recipient_scope', 'recipient_locations', 'recipient_type_names', 'place_of_performance_scope', 'place_of_performance_locations', 'award_amounts', 'award_ids', 'program_numbers', 'naics_codes', 'psc_codes', 'contract_pricing_type_codes', 'set_aside_type_codes', 'extent_competed_type_codes' ] if key not in key_list: raise InvalidParameterException('Invalid filter: ' + key + ' does not exist.') if key == "keyword": keyword = value # alias # description_match = False # description_qs = queryset.filter(description__icontains=keyword) # if description_qs.exists(): # description_match = True recipient_match = False recipient_list = LegalEntity.objects.all().values( 'legal_entity_id').filter(recipient_name__icontains=keyword) if recipient_list.exists(): recipient_match = True recipient_qs = queryset.filter(recipient__in=recipient_list) naics_match = False if keyword.isnumeric(): naics_list = NAICS.objects.all().filter( code__icontains=keyword).values('code') else: naics_list = NAICS.objects.all().filter( description__icontains=keyword).values('code') if naics_list.exists(): naics_match = True naics_qs = queryset.filter( award__latest_transaction__contract_data__naics__in= naics_list) psc_match = False if len(keyword) == 4 and PSC.objects.all().filter( code=keyword).exists(): psc_list = PSC.objects.all().filter( code=keyword).values('code') else: psc_list = PSC.objects.all().filter( description__icontains=keyword).values('code') if psc_list.exists(): psc_match = True psc_qs = queryset.filter( award__latest_transaction__contract_data__product_or_service_code__in =psc_list) duns_match = False non_parent_duns_list = LegalEntity.objects.all().values( 'legal_entity_id').filter(recipient_unique_id=keyword) parent_duns_list = LegalEntity.objects.all().values( 'legal_entity_id').filter(parent_recipient_unique_id=keyword) duns_list = non_parent_duns_list | parent_duns_list if duns_list.exists(): duns_match = True duns_qs = queryset.filter(recipient__in=duns_list) piid_qs = queryset.filter(award__piid=keyword) fain_qs = queryset.filter(award__fain=keyword) # Always filter on fain/piid because fast: queryset = piid_qs queryset |= fain_qs # if description_match: # queryset |= description_qs if recipient_match: queryset |= recipient_qs if naics_match: queryset |= naics_qs if psc_match: queryset |= psc_qs if duns_match: queryset |= duns_qs elif key == "elasticsearch_keyword": keyword = value transaction_ids = elasticsearch_helper.get_download_ids( keyword=keyword, field='transaction_id') # flatten IDs transaction_ids = list( itertools.chain.from_iterable(transaction_ids)) logger.info('Found {} transactions based on keyword: {}'.format( len(transaction_ids), keyword)) transaction_ids = [ str(transaction_id) for transaction_id in transaction_ids ] queryset = queryset.filter( award__latest_transaction__id__isnull=False) queryset &= queryset.extra(where=[ '"transaction_normalized"."id" = ANY(\'{{{}}}\'::int[])'. format(','.join(transaction_ids)) ]) elif key == "time_period": or_queryset = None queryset_init = False for v in value: date_type = v.get("date_type", "action_date") if date_type not in ["action_date", "last_modified_date"]: raise InvalidParameterException( 'Invalid date_type: {}'.format(date_type)) date_field = ( date_type if date_type == 'action_date' else 'award__latest_transaction__{}'.format(date_type)) kwargs = {} if v.get("start_date") is not None: kwargs["{}__gte".format(date_field)] = v.get("start_date") if v.get("end_date") is not None: kwargs["{}__lte".format(date_field)] = v.get("end_date") # (may have to cast to date) (oct 1 to sept 30) if queryset_init: or_queryset |= Subaward.objects.filter(**kwargs) else: queryset_init = True or_queryset = Subaward.objects.filter(**kwargs) if queryset_init: queryset &= or_queryset elif key == "award_type_codes": or_queryset = [] idv_flag = all(i in value for i in ['A', 'B', 'C', 'D']) for v in value: or_queryset.append(v) if len(or_queryset) != 0: filter_obj = Q(award__type__in=or_queryset) if idv_flag: filter_obj |= Q( award__latest_transaction__contract_data__pulled_from= 'IDV') queryset &= Subaward.objects.filter(filter_obj) elif key == "agencies": # TODO: Make function to match agencies in award filter throwing dupe error or_queryset = None funding_toptier = [] funding_subtier = [] awarding_toptier = [] awarding_subtier = [] for v in value: type = v["type"] tier = v["tier"] name = v["name"] if type == "funding": if tier == "toptier": funding_toptier.append(name) elif tier == "subtier": funding_subtier.append(name) else: raise InvalidParameterException( 'Invalid filter: agencies ' + tier + ' tier is invalid.') elif type == "awarding": if tier == "toptier": awarding_toptier.append(name) elif tier == "subtier": awarding_subtier.append(name) else: raise InvalidParameterException( 'Invalid filter: agencies ' + tier + ' tier is invalid.') else: raise InvalidParameterException( 'Invalid filter: agencies ' + type + ' type is invalid.') if len(funding_toptier) != 0: queryset &= Subaward.objects.filter( funding_agency__toptier_agency__name__in=funding_toptier) if len(funding_subtier) != 0: queryset &= Subaward.objects.filter( funding_agency__subtier_agency__name__in=funding_subtier) if len(awarding_toptier) != 0: queryset &= Subaward.objects.filter( awarding_agency__toptier_agency__name__in=awarding_toptier) if len(awarding_subtier) != 0: queryset &= Subaward.objects.filter( awarding_agency__subtier_agency__name__in=awarding_subtier) elif key == "legal_entities": or_queryset = [] for v in value: or_queryset.append(v) if len(or_queryset) != 0: queryset &= Subaward.objects.filter( recipient__legal_entity_id__in=or_queryset) elif key == "recipient_search_text": if len(value) != 1: raise InvalidParameterException( 'Invalid filter: recipient_search_text must have exactly one value.' ) recipient_string = str(value[0]) filter_obj = Q( recipient__recipient_name__icontains=recipient_string) if len(recipient_string) == 9: filter_obj |= Q( recipient__recipient_unique_id__iexact=recipient_string) queryset &= Subaward.objects.filter(filter_obj) elif key == "recipient_scope": if value == "domestic": queryset = queryset.filter( recipient__location__country_name="UNITED STATES") elif value == "foreign": queryset = queryset.exclude( recipient__location__country_name="UNITED STATES") else: raise InvalidParameterException( 'Invalid filter: recipient_scope type is invalid.') elif key == "recipient_locations": or_queryset = geocode_filter_locations('recipient__location', value, 'Subaward') queryset &= or_queryset elif key == "recipient_type_names": if len(value) != 0: queryset &= Subaward.objects.filter( recipient__business_categories__overlap=value) elif key == "place_of_performance_scope": if value == "domestic": queryset = queryset.filter( place_of_performance__country_name="UNITED STATES") elif value == "foreign": queryset = queryset.exclude( place_of_performance__country_name="UNITED STATES") else: raise InvalidParameterException( 'Invalid filter: place_of_performance_scope is invalid.') elif key == "place_of_performance_locations": or_queryset = geocode_filter_locations('place_of_performance', value, 'Subaward') queryset &= or_queryset elif key == "award_amounts": or_queryset = None queryset_init = False for v in value: if v.get("lower_bound") is not None and v.get( "upper_bound") is not None: if queryset_init: or_queryset |= Subaward.objects.filter( amount__gt=v["lower_bound"], amount__lt=v["upper_bound"]) else: queryset_init = True or_queryset = Subaward.objects.filter( amount__gt=v["lower_bound"], amount__lt=v["upper_bound"]) elif v.get("lower_bound") is not None: if queryset_init: or_queryset |= Subaward.objects.filter( amount__gt=v["lower_bound"]) else: queryset_init = True or_queryset = Subaward.objects.filter( amount__gt=v["lower_bound"]) elif v.get("upper_bound") is not None: if queryset_init: or_queryset |= Subaward.objects.filter( amount__lt=v["upper_bound"]) else: queryset_init = True or_queryset = Subaward.objects.filter( amount__lt=v["upper_bound"]) else: raise InvalidParameterException( 'Invalid filter: award amount has incorrect object.') if queryset_init: queryset &= or_queryset elif key == "award_ids": if len(value) != 0: filter_obj = Q() for val in value: filter_obj |= (Q(award__piid__icontains=val) | Q(award__fain__icontains=val) | Q(award__uri__icontains=val)) queryset &= Subaward.objects.filter(filter_obj) elif key == "program_numbers": or_queryset = [] for v in value: or_queryset.append(v) if len(or_queryset) != 0: queryset &= Subaward.objects.filter( award__latest_transaction__assistance_data__cfda_number__in =or_queryset) elif key == "naics_codes": or_queryset = [] for v in value: or_queryset.append(v) if len(or_queryset) != 0: queryset &= Subaward.objects.filter( award__latest_transaction__contract_data__naics__in= or_queryset) elif key == "psc_codes": or_queryset = [] for v in value: or_queryset.append(v) if len(or_queryset) != 0: queryset &= Subaward.objects.filter( award__latest_transaction__contract_data__product_or_service_code__in =or_queryset) elif key == "contract_pricing_type_codes": or_queryset = [] for v in value: or_queryset.append(v) if len(or_queryset) != 0: queryset &= Subaward.objects.filter( award__latest_transaction__contract_data__type_of_contract_pricing__in =or_queryset) elif key == "set_aside_type_codes": or_queryset = [] for v in value: or_queryset.append(v) if len(or_queryset) != 0: queryset &= Subaward.objects.filter( award__latest_transaction__contract_data__type_set_aside__in =or_queryset) elif key == "extent_competed_type_codes": or_queryset = [] for v in value: or_queryset.append(v) if len(or_queryset) != 0: queryset &= Subaward.objects.filter( award__latest_transaction__contract_data__extent_competed__in =or_queryset) return queryset
def matview_search_filter(filters, model): queryset = model.objects.all() faba_flag = False faba_queryset = FinancialAccountsByAwards.objects.filter( award__isnull=False) for key, value in filters.items(): if value is None: raise InvalidParameterException('Invalid filter: ' + key + ' has null as its value.') key_list = [ 'keyword', 'elasticsearch_keyword', 'time_period', 'award_type_codes', 'agencies', 'legal_entities', 'recipient_search_text', 'recipient_scope', 'recipient_locations', 'recipient_type_names', 'place_of_performance_scope', 'place_of_performance_locations', 'award_amounts', 'award_ids', 'program_numbers', 'naics_codes', 'psc_codes', 'contract_pricing_type_codes', 'set_aside_type_codes', 'extent_competed_type_codes', # next 3 keys used by federal account page 'federal_account_ids', 'object_class', 'program_activity' ] if key not in key_list: raise InvalidParameterException('Invalid filter: ' + key + ' does not exist.') if key == "keyword": keyword = value upper_kw = keyword.upper() # keyword_string & award_id_string are Postgres TS_vectors. # keyword_string = recipient_name + naics_code + naics_description + psc_description + awards_description # award_id_string = piid + fain + uri compound_or = Q(keyword_ts_vector=keyword) | \ Q(award_ts_vector=keyword) | \ Q(recipient_unique_id=upper_kw) | \ Q(parent_recipient_unique_id=keyword) if keyword.isnumeric(): compound_or |= Q(naics_code__contains=keyword) if len(keyword) == 4 and PSC.objects.all().filter( code__iexact=keyword).exists(): compound_or |= Q(product_or_service_code__iexact=keyword) queryset = queryset.filter(compound_or) elif key == "elasticsearch_keyword": keyword = value transaction_ids = elasticsearch_helper.get_download_ids( keyword=keyword, field='transaction_id') # flatten IDs transaction_ids = list( itertools.chain.from_iterable(transaction_ids)) logger.info('Found {} transactions based on keyword: {}'.format( len(transaction_ids), keyword)) transaction_ids = [ str(transaction_id) for transaction_id in transaction_ids ] if model is UniversalAwardView: queryset = queryset.filter( latest_transaction__id__isnull=False) queryset &= queryset.extra(where=[ '"transaction_normalized"."id" = ANY(\'{{{}}}\'::int[])'. format(','.join(transaction_ids)) ]) elif key == "time_period": success, or_queryset = date_or_fy_queryset(value, model, "fiscal_year", "action_date") if success: queryset &= or_queryset elif key == "award_type_codes": idv_flag = all(i in value for i in contract_type_mapping.keys()) if len(value) != 0: filter_obj = Q(type__in=value) if idv_flag: filter_obj |= Q(pulled_from='IDV') queryset &= model.objects.filter(filter_obj) elif key == "agencies": # TODO: Make function to match agencies in award filter throwing dupe error funding_toptier = Q() funding_subtier = Q() awarding_toptier = Q() awarding_subtier = Q() for v in value: type = v["type"] tier = v["tier"] name = v["name"] if type == "funding": if tier == "toptier": funding_toptier |= Q(funding_toptier_agency_name=name) elif tier == "subtier": if 'toptier_name' in v: funding_subtier |= ( Q(funding_subtier_agency_name=name) & Q(funding_toptier_agency_name=v['toptier_name'] )) else: funding_subtier |= Q( funding_subtier_agency_name=name) else: raise InvalidParameterException( 'Invalid filter: agencies ' + tier + ' tier is invalid.') elif type == "awarding": if tier == "toptier": awarding_toptier |= Q( awarding_toptier_agency_name=name) elif tier == "subtier": if 'toptier_name' in v: awarding_subtier |= (Q( awarding_subtier_agency_name=name ) & Q( awarding_toptier_agency_name=v['toptier_name']) ) else: awarding_subtier |= Q( awarding_subtier_agency_name=name) else: raise InvalidParameterException( 'Invalid filter: agencies ' + tier + ' tier is invalid.') else: raise InvalidParameterException( 'Invalid filter: agencies ' + type + ' type is invalid.') awarding_queryfilter = Q() funding_queryfilter = Q() # Since these are Q filters, no DB hits for boolean checks if funding_toptier: funding_queryfilter |= funding_toptier if funding_subtier: funding_queryfilter |= funding_subtier if awarding_toptier: awarding_queryfilter |= awarding_toptier if awarding_subtier: awarding_queryfilter |= awarding_subtier queryset = queryset.filter(funding_queryfilter & awarding_queryfilter) elif key == "legal_entities": in_query = [v for v in value] if len(in_query) != 0: queryset &= model.objects.filter(recipient_id__in=in_query) elif key == "recipient_search_text": if len(value) != 1: raise InvalidParameterException( 'Invalid filter: recipient_search_text must have exactly one value.' ) upper_recipient_string = str(value[0]).upper() # recipient_name_ts_vector is a postgres TS_Vector filter_obj = Q(recipient_name_ts_vector=upper_recipient_string) if len(upper_recipient_string ) == 9 and upper_recipient_string[:5].isnumeric(): filter_obj |= Q(recipient_unique_id=upper_recipient_string) queryset &= model.objects.filter(filter_obj) elif key == "recipient_scope": if value == "domestic": queryset = queryset.filter( recipient_location_country_name="UNITED STATES") elif value == "foreign": queryset = queryset.exclude( recipient_location_country_name="UNITED STATES") else: raise InvalidParameterException( 'Invalid filter: recipient_scope type is invalid.') elif key == "recipient_locations": or_queryset = geocode_filter_locations('recipient_location', value, model, True) queryset &= or_queryset elif key == "recipient_type_names": if len(value) != 0: queryset &= model.objects.filter( business_categories__overlap=value) elif key == "place_of_performance_scope": if value == "domestic": queryset = queryset.filter(pop_country_name="UNITED STATES") elif value == "foreign": queryset = queryset.exclude(pop_country_name="UNITED STATES") else: raise InvalidParameterException( 'Invalid filter: place_of_performance_scope is invalid.') elif key == "place_of_performance_locations": queryset &= geocode_filter_locations('pop', value, model, True) elif key == "award_amounts": success, and_queryset = total_obligation_queryset( value, model, filters) if success: queryset &= and_queryset elif key == "award_ids": if len(value) != 0: filter_obj = Q() for val in value: # award_id_string is a Postgres TS_vector # award_id_string = piid + fain + uri filter_obj |= Q(award_ts_vector=val) queryset &= model.objects.filter(filter_obj) elif key == "program_numbers": in_query = [v for v in value] if len(in_query) != 0: queryset &= model.objects.filter(cfda_number__in=in_query) elif key == "naics_codes": in_query = [v for v in value] if len(in_query) != 0: queryset &= model.objects.filter(naics_code__in=in_query) elif key == "psc_codes": in_query = [v for v in value] if len(in_query) != 0: queryset &= model.objects.filter( product_or_service_code__in=in_query) elif key == "contract_pricing_type_codes": in_query = [v for v in value] if len(in_query) != 0: queryset &= model.objects.filter( type_of_contract_pricing__in=in_query) elif key == "set_aside_type_codes": or_queryset = Q() for v in value: or_queryset |= Q(type_set_aside__exact=v) queryset = queryset.filter(or_queryset) elif key == "extent_competed_type_codes": or_queryset = Q() for v in value: or_queryset |= Q(extent_competed__exact=v) queryset = queryset.filter(or_queryset) # Federal Account Filter elif key == "federal_account_ids": faba_flag = True or_queryset = Q() for v in value: or_queryset |= Q(treasury_account__federal_account_id=v) faba_queryset = faba_queryset.filter(or_queryset) # Federal Account Filter elif key == "object_class": faba_flag = True result = Q() for oc in value: subresult = Q() for (key, values) in oc.items(): subresult &= filter_on( "treasury_account__program_balances__object_class", key, values) result |= subresult faba_queryset = faba_queryset.filter(result) # Federal Account Filter elif key == "program_activity": faba_flag = True or_queryset = Q() for v in value: or_queryset |= Q( treasury_account__program_balances__program_activity__program_activity_code =v) faba_queryset = faba_queryset.filter(or_queryset) if faba_flag: award_ids = faba_queryset.values('award_id') queryset = queryset.filter(award_id__in=award_ids) return queryset
def subaward_filter(filters, for_downloads=False): queryset = SubawardView.objects.all() recipient_scope_q = Q(recipient_location_country_code="USA") | Q(recipient_location_country_name="UNITED STATES") pop_scope_q = Q(pop_country_code="USA") | Q(pop_country_name="UNITED STATES") for key, value in filters.items(): if value is None: raise InvalidParameterException('Invalid filter: ' + key + ' has null as its value.') key_list = [ 'keywords', 'elasticsearch_keyword', 'time_period', 'award_type_codes', 'agencies', 'legal_entities', 'recipient_search_text', 'recipient_scope', 'recipient_locations', 'recipient_type_names', 'place_of_performance_scope', 'place_of_performance_locations', 'award_amounts', 'award_ids', 'program_numbers', 'naics_codes', 'psc_codes', 'contract_pricing_type_codes', 'set_aside_type_codes', 'extent_competed_type_codes' ] if key not in key_list: raise InvalidParameterException('Invalid filter: ' + key + ' does not exist.') if key == "keywords": def keyword_parse(keyword): # keyword_ts_vector & award_ts_vector are Postgres TS_vectors. # keyword_ts_vector = recipient_name + psc_description + subaward_description # award_ts_vector = piid + fain + uri + subaward_number filter_obj = Q(keyword_ts_vector=keyword) | \ Q(award_ts_vector=keyword) # Commenting out until NAICS is associated with subawards in DAIMS 1.3.1 # if keyword.isnumeric(): # filter_obj |= Q(naics_code__contains=keyword) if len(keyword) == 4 and PSC.objects.filter(code__iexact=keyword).exists(): filter_obj |= Q(product_or_service_code__iexact=keyword) return filter_obj filter_obj = Q() for keyword in value: filter_obj |= keyword_parse(keyword) potential_duns = list(filter((lambda x: len(x) > 7 and len(x) < 10), value)) if len(potential_duns) > 0: filter_obj |= Q(recipient_unique_id__in=potential_duns) | \ Q(parent_recipient_unique_id__in=potential_duns) queryset = queryset.filter(filter_obj) elif key == "elasticsearch_keyword": keyword = value transaction_ids = elasticsearch_helper.get_download_ids(keyword=keyword, field='transaction_id') # flatten IDs transaction_ids = list(itertools.chain.from_iterable(transaction_ids)) logger.info('Found {} transactions based on keyword: {}'.format(len(transaction_ids), keyword)) transaction_ids = [str(transaction_id) for transaction_id in transaction_ids] queryset = queryset.filter(latest_transaction_id__isnull=False) # Prepare a SQL snippet to include in the predicate for searching an array of transaction IDs sql_fragment = '"subaward_view"."latest_transaction_id" = ANY(\'{{{}}}\'::int[])' # int[] -> int array type queryset &= queryset.extra(where=[sql_fragment.format(','.join(transaction_ids))]) elif key == "time_period": min_date = API_SEARCH_MIN_DATE if for_downloads: min_date = API_MIN_DATE queryset &= combine_date_range_queryset(value, SubawardView, min_date, API_MAX_DATE) elif key == "award_type_codes": queryset &= queryset.filter(Q(prime_award_type__in=value)) elif key == "agencies": # TODO: Make function to match agencies in award filter throwing dupe error funding_toptier = Q() funding_subtier = Q() awarding_toptier = Q() awarding_subtier = Q() for v in value: type = v["type"] tier = v["tier"] name = v["name"] if type == "funding": if tier == "toptier": funding_toptier |= Q(funding_toptier_agency_name=name) elif tier == "subtier": if 'toptier_name' in v: funding_subtier |= (Q(funding_subtier_agency_name=name) & Q(funding_toptier_agency_name=v['toptier_name'])) else: funding_subtier |= Q(funding_subtier_agency_name=name) elif type == "awarding": if tier == "toptier": awarding_toptier |= Q(awarding_toptier_agency_name=name) elif tier == "subtier": if 'toptier_name' in v: awarding_subtier |= (Q(awarding_subtier_agency_name=name) & Q(awarding_toptier_agency_name=v['toptier_name'])) else: awarding_subtier |= Q(awarding_subtier_agency_name=name) awarding_queryfilter = Q() funding_queryfilter = Q() # Since these are Q filters, no DB hits for boolean checks if funding_toptier: funding_queryfilter |= funding_toptier if funding_subtier: funding_queryfilter |= funding_subtier if awarding_toptier: awarding_queryfilter |= awarding_toptier if awarding_subtier: awarding_queryfilter |= awarding_subtier queryset = queryset.filter(funding_queryfilter & awarding_queryfilter) elif key == "legal_entities": # This filter key has effectively become obsolete by recipient_search_text msg = 'API request included "{}" key. No filtering will occur with provided value "{}"' logger.info(msg.format(key, value)) elif key == "recipient_search_text": def recip_string_parse(recipient_string): upper_recipient_string = recipient_string.upper() # recipient_name_ts_vector is a postgres TS_Vector filter_obj = Q(recipient_name_ts_vector=upper_recipient_string) if len(upper_recipient_string) == 9 and upper_recipient_string[:5].isnumeric(): filter_obj |= Q(recipient_unique_id=upper_recipient_string) return filter_obj filter_obj = Q() for recipient in value: filter_obj |= recip_string_parse(recipient) queryset &= queryset.filter(filter_obj) elif key == "recipient_scope": if value == "domestic": queryset = queryset.filter(recipient_scope_q) elif value == "foreign": queryset = queryset.exclude(recipient_scope_q) else: raise InvalidParameterException('Invalid filter: recipient_scope type is invalid.') elif key == "recipient_locations": queryset = queryset.filter(geocode_filter_locations('recipient_location', value, True)) elif key == "recipient_type_names": if len(value) != 0: queryset &= queryset.filter(Q(business_categories__overlap=value)) elif key == "place_of_performance_scope": if value == "domestic": queryset = queryset.filter(pop_scope_q) elif value == "foreign": queryset = queryset.exclude(pop_scope_q) else: raise InvalidParameterException('Invalid filter: place_of_performance_scope is invalid.') elif key == "place_of_performance_locations": queryset = queryset.filter(geocode_filter_locations('pop', value, True)) elif key == "award_amounts": queryset &= total_obligation_queryset(value, SubawardView, filters) elif key == "award_ids": filter_obj = Q() for val in value: # award_id_string is a Postgres TS_vector # award_id_string = piid + fain + uri + subaward_number filter_obj |= Q(award_ts_vector=val) queryset &= queryset.filter(filter_obj) # add "naics_codes" (column naics) after NAICS are mapped to subawards elif key in ("program_numbers", "psc_codes", "contract_pricing_type_codes"): filter_to_col = { "program_numbers": "cfda_number", "psc_codes": "product_or_service_code", "contract_pricing_type_codes": "type_of_contract_pricing", } in_query = [v for v in value] if len(in_query) != 0: queryset &= SubawardView.objects.filter(**{'{}__in'.format(filter_to_col[key]): in_query}) elif key in ("set_aside_type_codes", "extent_competed_type_codes"): or_queryset = Q() filter_to_col = { "set_aside_type_codes": "type_set_aside", "extent_competed_type_codes": "extent_competed", } in_query = [v for v in value] for v in value: or_queryset |= Q(**{'{}__exact'.format(filter_to_col[key]): in_query}) queryset = queryset.filter(or_queryset) return queryset
def matview_search_filter(filters, model, for_downloads=False): queryset = model.objects.all() recipient_scope_q = Q(recipient_location_country_code="USA") | Q(recipient_location_country_name="UNITED STATES") pop_scope_q = Q(pop_country_code="USA") | Q(pop_country_name="UNITED STATES") faba_flag = False faba_queryset = FinancialAccountsByAwards.objects.filter(award__isnull=False) for key, value in filters.items(): if value is None: raise InvalidParameterException('Invalid filter: ' + key + ' has null as its value.') key_list = [ 'keywords', 'elasticsearch_keyword', 'time_period', 'award_type_codes', 'agencies', 'legal_entities', 'recipient_id', 'recipient_search_text', 'recipient_scope', 'recipient_locations', 'recipient_type_names', 'place_of_performance_scope', 'place_of_performance_locations', 'award_amounts', 'award_ids', 'program_numbers', 'naics_codes', 'psc_codes', 'contract_pricing_type_codes', 'set_aside_type_codes', 'extent_competed_type_codes', # next 3 keys used by federal account page 'federal_account_ids', 'object_class', 'program_activity' ] if key not in key_list: raise InvalidParameterException('Invalid filter: ' + key + ' does not exist.') if key == "keywords": def keyword_parse(keyword): # keyword_ts_vector & award_ts_vector are Postgres TS_vectors. # keyword_ts_vector = recipient_name + naics_code + naics_description # + psc_description + awards_description # award_ts_vector = piid + fain + uri filter_obj = Q(keyword_ts_vector=keyword) | \ Q(award_ts_vector=keyword) if keyword.isnumeric(): filter_obj |= Q(naics_code__contains=keyword) if len(keyword) == 4 and PSC.objects.all().filter(code__iexact=keyword).exists(): filter_obj |= Q(product_or_service_code__iexact=keyword) return filter_obj filter_obj = Q() for keyword in value: filter_obj |= keyword_parse(keyword) potential_duns = list(filter((lambda x: len(x) > 7 and len(x) < 10), value)) if len(potential_duns) > 0: filter_obj |= Q(recipient_unique_id__in=potential_duns) | \ Q(parent_recipient_unique_id__in=potential_duns) queryset = queryset.filter(filter_obj) elif key == "elasticsearch_keyword": keyword = " ".join(value) if isinstance(value, list) else value transaction_ids = elasticsearch_helper.get_download_ids(keyword=keyword, field='transaction_id') # flatten IDs transaction_ids = list(itertools.chain.from_iterable(transaction_ids)) logger.info('Found {} transactions based on keyword: {}'.format(len(transaction_ids), keyword)) transaction_ids = [str(transaction_id) for transaction_id in transaction_ids] queryset &= queryset.extra( where=['"transaction_normalized"."id" = ANY(\'{{{}}}\'::int[])'.format(','.join(transaction_ids))]) elif key == "time_period": min_date = API_SEARCH_MIN_DATE if for_downloads: min_date = API_MIN_DATE queryset &= combine_date_range_queryset(value, model, min_date, API_MAX_DATE) elif key == "award_type_codes": queryset &= queryset.filter(Q(type__in=value)) elif key == "agencies": # TODO: Make function to match agencies in award filter throwing dupe error funding_toptier = Q() funding_subtier = Q() awarding_toptier = Q() awarding_subtier = Q() for v in value: type = v["type"] tier = v["tier"] name = v["name"] if type == "funding": if tier == "toptier": funding_toptier |= Q(funding_toptier_agency_name=name) elif tier == "subtier": if 'toptier_name' in v: funding_subtier |= (Q(funding_subtier_agency_name=name) & Q(funding_toptier_agency_name=v['toptier_name'])) else: funding_subtier |= Q(funding_subtier_agency_name=name) elif type == "awarding": if tier == "toptier": awarding_toptier |= Q(awarding_toptier_agency_name=name) elif tier == "subtier": if 'toptier_name' in v: awarding_subtier |= (Q(awarding_subtier_agency_name=name) & Q(awarding_toptier_agency_name=v['toptier_name'])) else: awarding_subtier |= Q(awarding_subtier_agency_name=name) awarding_queryfilter = Q() funding_queryfilter = Q() # Since these are Q filters, no DB hits for boolean checks if funding_toptier: funding_queryfilter |= funding_toptier if funding_subtier: funding_queryfilter |= funding_subtier if awarding_toptier: awarding_queryfilter |= awarding_toptier if awarding_subtier: awarding_queryfilter |= awarding_subtier queryset = queryset.filter(funding_queryfilter & awarding_queryfilter) elif key == "legal_entities": # This filter key has effectively become obsolete by recipient_search_text msg = 'API request included "{}" key. No filtering will occur with provided value "{}"' logger.info(msg.format(key, value)) # in_query = [v for v in value] # if len(in_query) != 0: # queryset &= model.objects.filter(recipient_id__in=in_query) elif key == "recipient_search_text": all_filters_obj = Q() for recip in value: upper_recipient_string = str(recip).upper() # recipient_name_ts_vector is a postgres TS_Vector filter_obj = Q(recipient_name_ts_vector=upper_recipient_string) if len(upper_recipient_string) == 9 and upper_recipient_string[:5].isnumeric(): filter_obj |= Q(recipient_unique_id=upper_recipient_string) all_filters_obj |= filter_obj queryset &= queryset.filter(all_filters_obj) elif key == "recipient_id": filter_obj = Q() recipient_hash = value[:-2] if value.endswith('P'): # For parent types, gather all of the children's transactions parent_duns_rows = ( RecipientProfile.objects.filter(recipient_hash=recipient_hash, recipient_level='P') .values('recipient_unique_id') ) if len(parent_duns_rows) == 1: parent_duns = parent_duns_rows[0]['recipient_unique_id'] filter_obj = Q(parent_recipient_unique_id=parent_duns) elif len(parent_duns_rows) > 2: # shouldn't occur raise InvalidParameterException('Non-unique parent record found in RecipientProfile') elif value.endswith('C'): filter_obj = Q(recipient_hash=recipient_hash, parent_recipient_unique_id__isnull=False) else: # "R" recipient level filter_obj = Q(recipient_hash=recipient_hash, parent_recipient_unique_id__isnull=True) queryset &= queryset.filter(filter_obj) elif key == "recipient_scope": if value == "domestic": queryset = queryset.filter(recipient_scope_q) elif value == "foreign": queryset = queryset.exclude(recipient_scope_q) else: raise InvalidParameterException('Invalid filter: recipient_scope type is invalid.') elif key == "recipient_locations": queryset = queryset.filter(geocode_filter_locations('recipient_location', value, True)) elif key == "recipient_type_names": if len(value) != 0: queryset &= queryset.filter(Q(business_categories__overlap=value)) elif key == "place_of_performance_scope": if value == "domestic": queryset = queryset.filter(pop_scope_q) elif value == "foreign": queryset = queryset.exclude(pop_scope_q) else: raise InvalidParameterException('Invalid filter: place_of_performance_scope is invalid.') elif key == "place_of_performance_locations": queryset = queryset.filter(geocode_filter_locations('pop', value, True)) elif key == "award_amounts": queryset &= total_obligation_queryset(value, model, filters) elif key == "award_ids": filter_obj = Q() for val in value: # award_id_string is a Postgres TS_vector # award_id_string = piid + fain + uri filter_obj |= Q(award_ts_vector=val) queryset &= queryset.filter(filter_obj) elif key == "program_numbers": in_query = [v for v in value] if len(in_query) != 0: queryset &= queryset.filter(Q(cfda_number__in=in_query)) elif key == "naics_codes": in_query = [v for v in value] if len(in_query) != 0: queryset &= queryset.filter(Q(naics_code__in=in_query)) elif key == "psc_codes": in_query = [v for v in value] if len(in_query) != 0: queryset &= queryset.filter(Q(product_or_service_code__in=in_query)) elif key == "contract_pricing_type_codes": in_query = [v for v in value] if len(in_query) != 0: queryset &= queryset.filter(Q(type_of_contract_pricing__in=in_query)) elif key == "set_aside_type_codes": or_queryset = Q() for v in value: or_queryset |= Q(type_set_aside__exact=v) queryset &= queryset.filter(or_queryset) elif key == "extent_competed_type_codes": or_queryset = Q() for v in value: or_queryset |= Q(extent_competed__exact=v) queryset &= queryset.filter(or_queryset) # Federal Account Filter elif key == "federal_account_ids": faba_flag = True or_queryset = Q() for v in value: or_queryset |= Q(treasury_account__federal_account_id=v) faba_queryset = faba_queryset.filter(or_queryset) # Federal Account Filter elif key == "object_class": faba_flag = True result = Q() for oc in value: subresult = Q() for (key, values) in oc.items(): subresult &= filter_on("treasury_account__program_balances__object_class", key, values) result |= subresult faba_queryset = faba_queryset.filter(result) # Federal Account Filter elif key == "program_activity": faba_flag = True or_queryset = Q() for v in value: or_queryset |= Q(treasury_account__program_balances__program_activity__program_activity_code=v) faba_queryset = faba_queryset.filter(or_queryset) if faba_flag: award_ids = faba_queryset.values('award_id') queryset &= queryset.filter(Q(award_id__in=award_ids)) return queryset