def load_fpds_incrementally(self, date: Optional[datetime], chunk_size: int = CHUNK_SIZE) -> None: """Process incremental loads based on a date range or full data loads""" if date is None: logger.info("Skipping deletes. Fetching all fpds transactions...") else: logger.info(f"Handling fpds transactions since {date}...") detached_award_procurement_ids = retrieve_deleted_fpds_transactions( start_datetime=date) stale_awards = delete_stale_fpds(detached_award_procurement_ids) self.update_award_records(awards=stale_awards, skip_cd_linkage=True) with psycopg2.connect(dsn=get_database_dsn_string()) as connection: logger.info("Fetching records to update") total_records = self.get_cursor_for_date_query( connection, date, True).fetchall()[0][0] records_processed = 0 logger.info("{} total records to update".format(total_records)) cursor = self.get_cursor_for_date_query(connection, date) while True: id_list = cursor.fetchmany(chunk_size) if len(id_list) == 0: break logger.info( "Loading batch (size: {}) from date query...".format( len(id_list))) self.modified_award_ids.extend( load_fpds_transactions([row[0] for row in id_list])) records_processed = records_processed + len(id_list) logger.info("{} out of {} processed".format( records_processed, total_records))
def load_fpds_from_file(self, file_path: str) -> None: """Loads arbitrary set of ids, WITHOUT checking for deletes""" total_count = 0 with RetrieveFileFromUri(file_path).get_file_object() as file: logger.info(f"Loading transactions from IDs in {file_path}") for next_batch in self.gen_read_file_for_ids(file): id_list = [int(re.search(r"\d+", x).group()) for x in next_batch] total_count += len(id_list) logger.info(f"Loading next batch (size: {len(id_list)}, ids {id_list[0]}-{id_list[-1]})...") self.modified_award_ids.extend(load_fpds_transactions(id_list)) logger.info(f"Total transaction IDs in file: {total_count}")
def delete_and_add_fpds_transaction_records(): from usaspending_api.broker.management.commands.load_fpds_transactions import Command as FPDSCommand from usaspending_api.etl.transaction_loaders.fpds_loader import delete_stale_fpds with Timer("Insert/delete FPDS transactions"): delete_ids = get_ids(TEMP_TRANSACTION_FPDS_DELETE_IDS_TABLE) add_ids = get_ids(TEMP_TRANSACTION_FPDS_ADD_IDS_TABLE) if not delete_ids and not add_ids: logger.info("No FPDS transaction records to add or delete") return # Structure necessary for deletes. delete_ids = {date.today().strftime("%Y-%m-%d"): delete_ids} fpds_command = FPDSCommand() stale_awards = delete_stale_fpds(delete_ids) stale_awards.extend(load_fpds_transactions(add_ids)) fpds_command.update_award_records(awards=stale_awards, skip_cd_linkage=False)
def handle(self, *args, **options): # Record script execution start time to update the FPDS last updated date in DB as appropriate update_time = datetime.now(timezone.utc) if options["reload_all"]: self.load_fpds_incrementally(None) elif options["date"]: self.load_fpds_incrementally(options["date"]) elif options["ids"]: self.modified_award_ids.extend( load_fpds_transactions(options["ids"])) elif options["file"]: self.load_fpds_from_file(options["file"]) elif options["since_last_load"]: last_load = get_last_load_date("fpds") if not last_load: raise ValueError( "No last load date for FPDS stored in the database") self.load_fpds_incrementally(last_load) self.update_award_records(awards=self.modified_award_ids, skip_cd_linkage=False) logger.info(f"Script took {datetime.now(timezone.utc) - update_time}") if failed_ids: failed_id_str = ", ".join([str(id) for id in failed_ids]) logger.error( f"The following detached_award_procurement_ids failed to load: [{failed_id_str}]" ) raise SystemExit(1) if options["reload_all"] or options["since_last_load"]: # we wait until after the load finishes to update the load date because if this crashes we'll need to load again update_last_load_date("fpds", update_time) logger.info(f"Successfully Completed")
def test_load_ids_empty(): fpds_loader.load_fpds_transactions([])
def test_load_ids_dummy_id( mock__insert_transaction_fpds_transaction, mock__insert_transaction_normalized_transaction, mock__update_transaction_fpds_transaction, mock__update_transaction_normalized_transaction, mock__lookup_existing_transaction, mock__insert_award, mock__matching_award, mock__fy, mock__extract_broker_objects, mock___fetch_subtier_agency_id, mock_connection, ): """ End-to-end unit test (which should not attempt database connections) to exercise the code-under-test independently, given fake broker IDs to load """ ################### # BEGIN SETUP MOCKS ################### # Mock output data of key participant functions in this test scenario # This is the baseline unconstrained scenario, where all patched functions' MagicMocks will behave as # required by the code # Mock the broker objects' data mock__extract_broker_objects.side_effect = _stub___extract_broker_objects ################### # END SETUP MOCKS ################### # Test run of the loader dummy_broker_ids = [101, 201, 301] fpds_loader.load_fpds_transactions(dummy_broker_ids) # Since the mocks will return "data" always when called, if not told to return "None", the branching logic in # load_fpds_transactions like: "lookup award, if not exists, create ... lookup transaction, if not exists, create", will # always "find" a *mock* award and transaction. # So, assert this baseline run followed that logic. That is: # - for each broker transaction extracted, # - an existing award that it belongs to was found in usaspending # - and an existing transaction that it belongs to was found in usaspending # One call per transactions to load from broker into usaspending assert mock__matching_award.call_count == 3 assert mock__lookup_existing_transaction.call_count == 3 assert mock__update_transaction_normalized_transaction.call_count == 3 assert mock__update_transaction_fpds_transaction.call_count == 3 # With all broker data being found in usaspending (so no inserts, only updates) mock__insert_award.assert_not_called() mock__insert_transaction_normalized_transaction.assert_not_called() mock__insert_transaction_fpds_transaction.assert_not_called() # Check that the correct data (e.g. IDs) are being propagated via the load_objects dictionary from call to call # Check only first transaction iteration load_objects_pre_transaction = mock__lookup_existing_transaction.call_args_list[ 0][0][1] final_award_id = mock__matching_award() # Compare data is as expected assert load_objects_pre_transaction["award"][ "transaction_unique_id"] == str(dummy_broker_ids[0]) assert load_objects_pre_transaction["transaction_normalized"][ "transaction_unique_id"] == str(dummy_broker_ids[0]) assert load_objects_pre_transaction["transaction_normalized"][ "award_id"] == final_award_id assert load_objects_pre_transaction["transaction_normalized"][ "funding_agency_id"] == 1 assert load_objects_pre_transaction["transaction_normalized"][ "awarding_agency_id"] == 1 assert 2001 <= load_objects_pre_transaction["transaction_normalized"][ "fiscal_year"] <= 2019