def test_really_old_transaction(client, agency_data): """Subtier Agencies for really old transactions should not count.""" TransactionNormalized.objects.update(fiscal_year=fy(settings.API_SEARCH_MIN_DATE) - 1) resp = client.get(URL.format(code="001", filter="")) assert resp.status_code == status.HTTP_200_OK assert resp.data["toptier_code"] == "001" assert resp.data["subtier_agency_count"] == 0
def get_subtier_agency_count(self): return (SubtierAgency.objects.filter( agency__toptier_agency=self.toptier_agency).annotate( was_an_awarding_agency=Exists( TransactionNormalized.objects.filter( fiscal_year__gte=fy(settings.API_SEARCH_MIN_DATE), awarding_agency__subtier_agency=OuterRef("pk"), ).values("pk"))).filter( was_an_awarding_agency=True).values("pk").count())
def get_subtier_agency_count(self): filters = {"fiscal_year__gte": fy(settings.API_SEARCH_MIN_DATE)} values = ["pk", "awarding_agency__subtier_agency"] return (SubtierAgency.objects.filter( agency__toptier_agency=self.toptier_agency ).extra(where=[ f"Exists({generate_raw_quoted_query(TransactionNormalized.objects.filter(**filters).values(*values))}" f" AND {Agency._meta.db_table}.subtier_agency_id = {SubtierAgency._meta.db_table}.subtier_agency_id)" ]).count())
def totals(self): outlays = defaultdict(Decimal) obligations = defaultdict(Decimal) budget_authority = defaultdict(Decimal) for ab in self.account_balances.all(): fiscal_year = fy(ab.reporting_period_start) budget_authority[fiscal_year] += ab.budget_authority_appropriated_amount_cpe outlays[fiscal_year] += ab.gross_outlay_amount_by_tas_cpe obligations[fiscal_year] += ab.obligations_incurred_total_by_tas_cpe results = { "outgoing": {"outlays": outlays, "obligations": obligations, "budget_authority": budget_authority}, "incoming": {}, } return results
def fiscal_year(self): fiscal_year = str( self.request.query_params.get("fiscal_year", current_fiscal_year())) if not fullmatch("[0-9]{4}", fiscal_year): raise UnprocessableEntityException( "Unrecognized fiscal_year format. Should be YYYY.") min_fiscal_year = fy(settings.API_SEARCH_MIN_DATE) fiscal_year = int(fiscal_year) if fiscal_year < min_fiscal_year: raise UnprocessableEntityException( f"fiscal_year is currently limited to an earliest year of {min_fiscal_year}." ) if fiscal_year > current_fiscal_year(): raise UnprocessableEntityException( f"fiscal_year may not exceed current fiscal year of {current_fiscal_year()}." ) return fiscal_year
def totals_object_class(self): results = [] for object_class in self.object_classes: obligations = defaultdict(Decimal) outlays = defaultdict(Decimal) for pb in self.program_balances.filter(object_class=object_class): reporting_fiscal_year = fy(pb.submission.reporting_period_start) obligations[reporting_fiscal_year] += pb.obligations_incurred_by_program_object_class_cpe outlays[reporting_fiscal_year] += pb.gross_outlay_amount_by_program_object_class_cpe result = { "major_object_class_code": None, "major_object_class_name": None, # TODO: enable once ObjectClass populated "object_class": object_class.object_class, # TODO: remove "outlays": obligations, "obligations": outlays, } results.append(result) return results
def totals_program_activity(self): results = [] for pa in self.program_activities: obligations = defaultdict(Decimal) outlays = defaultdict(Decimal) for pb in self.program_balances.filter(program_activity=pa): reporting_fiscal_year = fy(pb.submission.reporting_period_start) # TODO: once it is present, use the reporting_fiscal_year directly obligations[reporting_fiscal_year] += pb.obligations_incurred_by_program_object_class_cpe outlays[reporting_fiscal_year] += pb.gross_outlay_amount_by_program_object_class_cpe result = { "id": pa.id, "program_activity_name": pa.program_activity_name, "program_activity_code": pa.program_activity_code, "obligations": obligations, "outlays": outlays, } results.append(result) return results
class UnlinkedAwards(AgencyBase): """Returns submission history of the specified agency for the specified fiscal year and period""" endpoint_doc = "usaspending_api/api_contracts/contracts/v2/reporting/agencies/toptier_code/fiscal_year/fiscal_period/unlinked_awards/type.md" annotation_options = { "assistance": { "unlinked_file_c_award_count": F("unlinked_assistance_c_awards"), "unlinked_file_d_award_count": F("unlinked_assistance_d_awards"), "total_linked_award_count": F("linked_assistance_awards"), }, "procurement": { "unlinked_file_c_award_count": F("unlinked_procurement_c_awards"), "unlinked_file_d_award_count": F("unlinked_procurement_d_awards"), "total_linked_award_count": F("linked_procurement_awards"), }, } tinyshield_model = [ { "key": "type", "name": "type", "type": "enum", "enum_values": ["assistance", "procurement"], "optional": False, "default": None, "allow_nulls": False, }, { "key": "fiscal_year", "name": "fiscal_year", "type": "integer", "min": fy(settings.API_SEARCH_MIN_DATE), "max": current_fiscal_year(), "optional": False, "default": None, "allow_nulls": False, }, { "key": "fiscal_period", "name": "fiscal_period", "type": "integer", "min": 2, "max": 12, "optional": False, "default": None, "allow_nulls": False, }, ] @cache_response() def get(self, request, toptier_code, fiscal_year, fiscal_period, type): my_request = { "type": type, "fiscal_year": fiscal_year, "fiscal_period": fiscal_period } validated = TinyShield(self.tinyshield_model).block(my_request) self.annotations = self.annotation_options[validated["type"]] self.fiscal_year = validated["fiscal_year"] self.fiscal_period = validated["fiscal_period"] return Response(self.get_unlinked_awards()) def get_unlinked_awards(self): result = (ReportingAgencyOverview.objects.filter( toptier_code=self.toptier_code, fiscal_year=self.fiscal_year, fiscal_period=self.fiscal_period).annotate( **self.annotations).values( "unlinked_file_c_award_count", "unlinked_file_d_award_count", "total_linked_award_count", ).first()) if not result: result = { "unlinked_file_c_award_count": 0, "unlinked_file_d_award_count": 0, "total_linked_award_count": 0, } return result
def save(self, *args, **kwargs): self.fiscal_year = fy(self.action_date) super().save(*args, **kwargs)
def get_date_signed__fy(self, obj): return fy(obj.date_signed)
def test_fy_type_exceptions(not_date): with pytest.raises(TypeError): fy(not_date)
def test_fy_returns_correct(raw_date, expected_fy): assert fy(raw_date) == expected_fy
def test_fy_returns_integer(raw_date, expected_fy): assert isinstance(fy(raw_date), int)
def insert_new_fabs(to_insert): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perform_country_n", "state_code": "place_of_perfor_state_code", "state_name": "place_of_perform_state_nam", "city_name": "place_of_performance_city", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "foreign_location_description": "place_of_performance_forei", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "performance_code": "place_of_performance_code", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "city_name": "legal_entity_city_name", "city_code": "legal_entity_city_code", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "foreign_location_description": "legal_entity_foreign_descr", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "foreign_city_name": "legal_entity_foreign_city", } fabs_normalized_field_map = { "type": "assistance_type", "description": "award_description", "funding_amount": "total_funding_amount", } fabs_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount", } update_award_ids = [] for row in to_insert: upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, {"recipient_flag": True}) recipient_name = row["awardee_or_recipient_legal"] legal_entity = LegalEntity.objects.create( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name if recipient_name is not None else "", parent_recipient_unique_id=row["ultimate_parent_unique_ide"], ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type="fabs"), "business_types_description": row["business_types_desc"], } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=row["unique_award_key"], fain=row["fain"], uri=row["uri"], record_type=row["record_type"], ) award.save() # Append row to list of Awards updated update_award_ids.append(award.id) try: last_mod_date = datetime.strptime(str(row["modified_at"]), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row["modified_at"]), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "type_description": row["assistance_type_desc"], "transaction_unique_id": row["afa_generated_unique"], } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fabs_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) financial_assistance_data = load_data_into_model( TransactionFABS(), row, field_map=fabs_field_map, as_dict=True # thrown away ) # Hack to cut back on the number of warnings dumped to the log. financial_assistance_data["updated_at"] = cast_datetime_to_utc( financial_assistance_data["updated_at"]) financial_assistance_data["created_at"] = cast_datetime_to_utc( financial_assistance_data["created_at"]) financial_assistance_data["modified_at"] = cast_datetime_to_utc( financial_assistance_data["modified_at"]) afa_generated_unique = financial_assistance_data[ "afa_generated_unique"] unique_fabs = TransactionFABS.objects.filter( afa_generated_unique=afa_generated_unique) if unique_fabs.first(): transaction_normalized_dict["update_date"] = datetime.now( timezone.utc) transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # Update TransactionNormalized TransactionNormalized.objects.filter( id=unique_fabs.first().transaction.id).update( **transaction_normalized_dict) # Update TransactionFABS unique_fabs.update(**financial_assistance_data) else: # Create TransactionNormalized transaction_normalized = TransactionNormalized( **transaction_normalized_dict) transaction_normalized.save() # Create TransactionFABS transaction_fabs = TransactionFABS( transaction=transaction_normalized, **financial_assistance_data) transaction_fabs.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = afa_generated_unique legal_entity.save() return update_award_ids
def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFABS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True) query = "SELECT * FROM published_award_financial_assistance" arguments = [] fy_begin = "10/01/" + str(fiscal_year - 1) fy_end = "09/30/" + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += " action_date::Date BETWEEN %s AND %s" arguments += [fy_begin] arguments += [fy_end] query += " ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s" arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") award_financial_assistance_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code", } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c", } fabs_normalized_field_map = { "type": "assistance_type", "description": "award_description", "funding_amount": "total_funding_amount", } fabs_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount", } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(award_financial_assistance_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of assistance data") # skip_count = 0 # ROW ITERATION STARTS HERE lel_bulk = [] pop_bulk = [] legal_entity_bulk = [] award_bulk = [] transaction_assistance_bulk = [] transaction_normalized_bulk = [] logger.info( "Getting legal entity location objects for {} rows...".format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Recipient flag is true for LeL legal_entity_location = get_or_create_location( legal_entity_location_field_map, row, {"recipient_flag": True}, save=False) lel_bulk.append(legal_entity_location) logger.info("Bulk creating {} legal entity location rows...".format( len(lel_bulk))) try: Location.objects.bulk_create(lel_bulk) except IntegrityError: logger.info("!!! DUPLICATES FOUND. Continuing... ") logger.info( "Getting place of performance objects for {} rows...".format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Place of Performance flag is true for PoP pop_location = get_or_create_location( place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False) pop_bulk.append(pop_location) logger.info("Bulk creating {} place of performance rows...".format( len(pop_bulk))) try: Location.objects.bulk_create(pop_bulk) except IntegrityError: logger.info("!!! DUPLICATES FOUND. Continuing... ") logger.info("Getting legal entity objects for {} rows...".format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): recipient_name = row.get("awardee_or_recipient_legal", "") legal_entity = LegalEntity.objects.filter( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name).first() if legal_entity is None: legal_entity = LegalEntity( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name) legal_entity_value_map = {"location": lel_bulk[index - 1]} legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=False) legal_entity_bulk.append(legal_entity) logger.info("Bulk creating {} legal entity rows...".format( len(legal_entity_bulk))) try: LegalEntity.objects.bulk_create(legal_entity_bulk) except IntegrityError: logger.info("!!! DUPLICATES FOUND. Continuing... ") awarding_agency_list = [] funding_agency_list = [] logger.info("Getting award objects for {} rows...".format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row["awarding_agency_code"] is None or len( row["awarding_agency_code"].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[ awarding_subtier_agency_id]["toptier_agency_id"] awarding_cgac_code = toptier_agency_map[ awarding_toptier_agency_id] row["awarding_agency_code"] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row["funding_agency_code"] is None or len( row["funding_agency_code"].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get( row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = subtier_to_agency_map[ funding_subtier_agency_id]["toptier_agency_id"] funding_cgac_code = toptier_agency_map[ funding_toptier_agency_id] else: funding_cgac_code = None row["funding_agency_code"] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row["awarding_agency_code"], row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_toptier_subtier( row["funding_agency_code"], row["funding_sub_tier_agency_co"]) awarding_agency_list.append(awarding_agency) funding_agency_list.append(funding_agency) # award.save() is called in Award.get_or_create_summary_award by default created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, fain=row.get("fain"), uri=row.get("uri"), generated_unique_award_id=row.get("unique_award_key"), save=False, ) award_bulk.append(award) award_update_id_list.append(award.id) award_assistance_update_id_list.append(award.id) logger.info("Bulk creating {} award rows...".format(len(award_bulk))) try: Award.objects.bulk_create(award_bulk) except IntegrityError: logger.info("!!! DUPLICATES FOUND. Continuing... ") logger.info("Getting transaction_normalized for {} rows...".format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): parent_txn_value_map = { "award": award_bulk[index - 1], "awarding_agency": awarding_agency_list[index - 1], "funding_agency": funding_agency_list[index - 1], "recipient": legal_entity_bulk[index - 1], "place_of_performance": pop_bulk[index - 1], "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fabs_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) transaction_normalized = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction_normalized.fiscal_year = fy( transaction_normalized.action_date) transaction_normalized_bulk.append(transaction_normalized) logger.info("Bulk creating {} TransactionNormalized rows...".format( len(transaction_normalized_bulk))) try: TransactionNormalized.objects.bulk_create( transaction_normalized_bulk) except IntegrityError: logger.info( "Tried and failed to insert duplicate transaction_normalized row. Continuing... " ) for index, row in enumerate(award_financial_assistance_data, 1): financial_assistance_data = load_data_into_model( TransactionFABS(), row, field_map=fabs_field_map, as_dict=True # thrown away ) transaction_assistance = TransactionFABS( transaction=transaction_normalized_bulk[index - 1], **financial_assistance_data) transaction_assistance_bulk.append(transaction_assistance) logger.info("Bulk creating TransactionFABS rows...") try: TransactionFABS.objects.bulk_create(transaction_assistance_bulk) except IntegrityError: logger.info("!!! DUPLICATES FOUND. Continuing... ")
def test_fy_type_exceptions(not_date): with pytest.raises(Exception): fy(not_date)
def insert_new_fpds(self, to_insert, total_rows): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", } fpds_normalized_field_map = {"type": "contract_award_type", "description": "award_description"} fpds_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount", } for index, row in enumerate(to_insert, 1): upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, {"recipient_flag": True, "is_fpds": True} ) recipient_name = row["awardee_or_recipient_legal"] legal_entity = LegalEntity.objects.create( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name if recipient_name is not None else "", ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type="fpds"), "is_fpds": True, } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"]) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=row["unique_award_key"], piid=row["piid"] ) award.parent_award_piid = row.get("parent_award_id") award.save() # Append row to list of Awards updated AWARD_UPDATE_ID_LIST.append(award.id) if row["last_modified"] and len(str(row["last_modified"])) == len("YYYY-MM-DD HH:MM:SS"): # 19 characters dt_fmt = "%Y-%m-%d %H:%M:%S" else: dt_fmt = "%Y-%m-%d %H:%M:%S.%f" # try using this even if last_modified isn't a valid string try: last_mod_date = datetime.strptime(str(row["last_modified"]), dt_fmt).date() except ValueError: # handle odd-string formats and NULLs from the upstream FPDS-NG system info_message = "Invalid value '{}' does not match: '{}'".format(row["last_modified"], dt_fmt) logger.info(info_message) last_mod_date = None award_type, award_type_desc = award_types(row) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "transaction_unique_id": row["detached_award_proc_unique"], "is_fpds": True, "type": award_type, "type_description": award_type_desc, } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fpds_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) contract_instance = load_data_into_model( # TransactionFPDS() is "thrown" away TransactionFPDS(), row, field_map=fpds_field_map, as_dict=True, ) detached_award_proc_unique = contract_instance["detached_award_proc_unique"] unique_fpds = TransactionFPDS.objects.filter(detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.now(timezone.utc) transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).update( **transaction_normalized_dict ) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized(**transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = detached_award_proc_unique legal_entity.save()
def insert_new_fabs(to_insert): fabs_normalized_field_map = { "type": "assistance_type", "description": "award_description", "funding_amount": "total_funding_amount", } fabs_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount", } update_award_ids = [] for row in to_insert: upper_case_dict_values(row) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=row["unique_award_key"], fain=row["fain"], uri=row["uri"], record_type=row["record_type"], ) award.save() # Append row to list of Awards updated update_award_ids.append(award.id) try: last_mod_date = datetime.strptime(str(row["modified_at"]), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row["modified_at"]), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "type_description": row["assistance_type_desc"], "transaction_unique_id": row["afa_generated_unique"], "business_categories": get_business_categories(row=row, data_type="fabs"), } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fabs_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) financial_assistance_data = load_data_into_model( TransactionFABS(), row, field_map=fabs_field_map, as_dict=True # thrown away ) # Hack to cut back on the number of warnings dumped to the log. financial_assistance_data["updated_at"] = cast_datetime_to_utc( financial_assistance_data["updated_at"]) financial_assistance_data["created_at"] = cast_datetime_to_utc( financial_assistance_data["created_at"]) financial_assistance_data["modified_at"] = cast_datetime_to_utc( financial_assistance_data["modified_at"]) afa_generated_unique = financial_assistance_data[ "afa_generated_unique"] unique_fabs = TransactionFABS.objects.filter( afa_generated_unique=afa_generated_unique) if unique_fabs.first(): transaction_normalized_dict["update_date"] = datetime.now( timezone.utc) transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # Update TransactionNormalized TransactionNormalized.objects.filter( id=unique_fabs.first().transaction.id).update( **transaction_normalized_dict) # Update TransactionFABS unique_fabs.update(**financial_assistance_data) else: # Create TransactionNormalized transaction_normalized = TransactionNormalized( **transaction_normalized_dict) transaction_normalized.save() # Create TransactionFABS transaction_fabs = TransactionFABS( transaction=transaction_normalized, **financial_assistance_data) transaction_fabs.save() return update_award_ids
def test_fy_none(): assert fy(None) is None
def calculate_fiscal_year(broker_input): return fy(broker_input["action_date"])
def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000): query = "SELECT * FROM published_award_financial_assistance" arguments = [] fy_begin = "10/01/" + str(fiscal_year - 1) fy_end = "09/30/" + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += " action_date::Date BETWEEN %s AND %s" arguments += [fy_begin] arguments += [fy_end] query += " ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s" arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") award_financial_assistance_data = dictfetchall(db_cursor) fabs_normalized_field_map = { "type": "assistance_type", "description": "award_description", "funding_amount": "total_funding_amount", } fabs_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount", } logger.info("Getting total rows") total_rows = len(award_financial_assistance_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of assistance data") # ROW ITERATION STARTS HERE award_bulk = [] transaction_assistance_bulk = [] transaction_normalized_bulk = [] awarding_agency_list = [] funding_agency_list = [] logger.info("Getting award objects for {} rows...".format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row["awarding_agency_code"] is None or len( row["awarding_agency_code"].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[ awarding_subtier_agency_id]["toptier_agency_id"] awarding_toptier_code = toptier_agency_map[ awarding_toptier_agency_id] row["awarding_agency_code"] = awarding_toptier_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row["funding_agency_code"] is None or len( row["funding_agency_code"].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get( row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = subtier_to_agency_map[ funding_subtier_agency_id]["toptier_agency_id"] funding_toptier_code = toptier_agency_map[ funding_toptier_agency_id] else: funding_toptier_code = None row["funding_agency_code"] = funding_toptier_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row["awarding_agency_code"], row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_toptier_subtier( row["funding_agency_code"], row["funding_sub_tier_agency_co"]) awarding_agency_list.append(awarding_agency) funding_agency_list.append(funding_agency) # award.save() is called in Award.get_or_create_summary_award by default created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, fain=row.get("fain"), uri=row.get("uri"), generated_unique_award_id=row.get("unique_award_key"), save=False, ) award_bulk.append(award) award_update_id_list.append(award.id) award_assistance_update_id_list.append(award.id) logger.info("Bulk creating {} award rows...".format(len(award_bulk))) try: Award.objects.bulk_create(award_bulk) except IntegrityError: logger.info("!!! DUPLICATES FOUND. Continuing... ") logger.info("Getting transaction_normalized for {} rows...".format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): parent_txn_value_map = { "award": award_bulk[index - 1], "awarding_agency": awarding_agency_list[index - 1], "funding_agency": funding_agency_list[index - 1], "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fabs_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) transaction_normalized = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction_normalized.fiscal_year = fy( transaction_normalized.action_date) transaction_normalized_bulk.append(transaction_normalized) logger.info("Bulk creating {} TransactionNormalized rows...".format( len(transaction_normalized_bulk))) try: TransactionNormalized.objects.bulk_create( transaction_normalized_bulk) except IntegrityError: logger.info( "Tried and failed to insert duplicate transaction_normalized row. Continuing... " ) for index, row in enumerate(award_financial_assistance_data, 1): financial_assistance_data = load_data_into_model( TransactionFABS(), row, field_map=fabs_field_map, as_dict=True # thrown away ) transaction_assistance = TransactionFABS( transaction=transaction_normalized_bulk[index - 1], **financial_assistance_data) transaction_assistance_bulk.append(transaction_assistance) logger.info("Bulk creating TransactionFABS rows...") try: TransactionFABS.objects.bulk_create(transaction_assistance_bulk) except IntegrityError: logger.info("!!! DUPLICATES FOUND. Continuing... ")