Example #1
0
    def load_fpds_incrementally(self,
                                date: Optional[datetime],
                                chunk_size: int = CHUNK_SIZE) -> None:
        """Process incremental loads based on a date range or full data loads"""

        if date is None:
            logger.info("Skipping deletes. Fetching all fpds transactions...")
        else:
            logger.info(f"Handling fpds transactions since {date}...")

            detached_award_procurement_ids = retrieve_deleted_fpds_transactions(
                start_datetime=date)
            stale_awards = delete_stale_fpds(detached_award_procurement_ids)
            self.update_award_records(awards=stale_awards,
                                      skip_cd_linkage=True)

        with psycopg2.connect(dsn=get_database_dsn_string()) as connection:
            logger.info("Fetching records to update")
            total_records = self.get_cursor_for_date_query(
                connection, date, True).fetchall()[0][0]
            records_processed = 0
            logger.info("{} total records to update".format(total_records))
            cursor = self.get_cursor_for_date_query(connection, date)
            while True:
                id_list = cursor.fetchmany(chunk_size)
                if len(id_list) == 0:
                    break
                logger.info(
                    "Loading batch (size: {}) from date query...".format(
                        len(id_list)))
                self.modified_award_ids.extend(
                    load_fpds_transactions([row[0] for row in id_list]))
                records_processed = records_processed + len(id_list)
                logger.info("{} out of {} processed".format(
                    records_processed, total_records))
Example #2
0
    def load_fpds_from_file(self, file_path: str) -> None:
        """Loads arbitrary set of ids, WITHOUT checking for deletes"""
        total_count = 0
        with RetrieveFileFromUri(file_path).get_file_object() as file:
            logger.info(f"Loading transactions from IDs in {file_path}")
            for next_batch in self.gen_read_file_for_ids(file):
                id_list = [int(re.search(r"\d+", x).group()) for x in next_batch]
                total_count += len(id_list)
                logger.info(f"Loading next batch (size: {len(id_list)}, ids {id_list[0]}-{id_list[-1]})...")
                self.modified_award_ids.extend(load_fpds_transactions(id_list))

        logger.info(f"Total transaction IDs in file: {total_count}")
    def delete_and_add_fpds_transaction_records():
        from usaspending_api.broker.management.commands.load_fpds_transactions import Command as FPDSCommand
        from usaspending_api.etl.transaction_loaders.fpds_loader import delete_stale_fpds

        with Timer("Insert/delete FPDS transactions"):
            delete_ids = get_ids(TEMP_TRANSACTION_FPDS_DELETE_IDS_TABLE)
            add_ids = get_ids(TEMP_TRANSACTION_FPDS_ADD_IDS_TABLE)
            if not delete_ids and not add_ids:
                logger.info("No FPDS transaction records to add or delete")
                return

            # Structure necessary for deletes.
            delete_ids = {date.today().strftime("%Y-%m-%d"): delete_ids}

            fpds_command = FPDSCommand()
            stale_awards = delete_stale_fpds(delete_ids)
            stale_awards.extend(load_fpds_transactions(add_ids))
            fpds_command.update_award_records(awards=stale_awards,
                                              skip_cd_linkage=False)
Example #4
0
    def handle(self, *args, **options):

        # Record script execution start time to update the FPDS last updated date in DB as appropriate
        update_time = datetime.now(timezone.utc)

        if options["reload_all"]:
            self.load_fpds_incrementally(None)

        elif options["date"]:
            self.load_fpds_incrementally(options["date"])

        elif options["ids"]:
            self.modified_award_ids.extend(
                load_fpds_transactions(options["ids"]))

        elif options["file"]:
            self.load_fpds_from_file(options["file"])

        elif options["since_last_load"]:
            last_load = get_last_load_date("fpds")
            if not last_load:
                raise ValueError(
                    "No last load date for FPDS stored in the database")
            self.load_fpds_incrementally(last_load)

        self.update_award_records(awards=self.modified_award_ids,
                                  skip_cd_linkage=False)

        logger.info(f"Script took {datetime.now(timezone.utc) - update_time}")

        if failed_ids:
            failed_id_str = ", ".join([str(id) for id in failed_ids])
            logger.error(
                f"The following detached_award_procurement_ids failed to load: [{failed_id_str}]"
            )
            raise SystemExit(1)

        if options["reload_all"] or options["since_last_load"]:
            # we wait until after the load finishes to update the load date because if this crashes we'll need to load again
            update_last_load_date("fpds", update_time)

        logger.info(f"Successfully Completed")
def test_load_ids_empty():
    fpds_loader.load_fpds_transactions([])
def test_load_ids_dummy_id(
    mock__insert_transaction_fpds_transaction,
    mock__insert_transaction_normalized_transaction,
    mock__update_transaction_fpds_transaction,
    mock__update_transaction_normalized_transaction,
    mock__lookup_existing_transaction,
    mock__insert_award,
    mock__matching_award,
    mock__fy,
    mock__extract_broker_objects,
    mock___fetch_subtier_agency_id,
    mock_connection,
):
    """
    End-to-end unit test (which should not attempt database connections) to exercise the code-under-test
    independently, given fake broker IDs to load
    """
    ###################
    # BEGIN SETUP MOCKS
    ###################
    # Mock output data of key participant functions in this test scenario
    # This is the baseline unconstrained scenario, where all patched functions' MagicMocks will behave as
    # required by the code

    # Mock the broker objects' data
    mock__extract_broker_objects.side_effect = _stub___extract_broker_objects

    ###################
    # END SETUP MOCKS
    ###################

    # Test run of the loader
    dummy_broker_ids = [101, 201, 301]
    fpds_loader.load_fpds_transactions(dummy_broker_ids)

    # Since the mocks will return "data" always when called, if not told to return "None", the branching logic in
    # load_fpds_transactions like: "lookup award, if not exists, create ... lookup transaction, if not exists, create", will
    # always "find" a *mock* award and transaction.
    # So, assert this baseline run followed that logic. That is:
    # - for each broker transaction extracted,
    # - an existing award that it belongs to was found in usaspending
    # - and an existing transaction that it belongs to was found in usaspending

    # One call per transactions to load from broker into usaspending
    assert mock__matching_award.call_count == 3
    assert mock__lookup_existing_transaction.call_count == 3
    assert mock__update_transaction_normalized_transaction.call_count == 3
    assert mock__update_transaction_fpds_transaction.call_count == 3

    # With all broker data being found in usaspending (so no inserts, only updates)
    mock__insert_award.assert_not_called()
    mock__insert_transaction_normalized_transaction.assert_not_called()
    mock__insert_transaction_fpds_transaction.assert_not_called()

    # Check that the correct data (e.g. IDs) are being propagated via the load_objects dictionary from call to call
    # Check only first transaction iteration
    load_objects_pre_transaction = mock__lookup_existing_transaction.call_args_list[
        0][0][1]
    final_award_id = mock__matching_award()

    # Compare data is as expected
    assert load_objects_pre_transaction["award"][
        "transaction_unique_id"] == str(dummy_broker_ids[0])
    assert load_objects_pre_transaction["transaction_normalized"][
        "transaction_unique_id"] == str(dummy_broker_ids[0])
    assert load_objects_pre_transaction["transaction_normalized"][
        "award_id"] == final_award_id
    assert load_objects_pre_transaction["transaction_normalized"][
        "funding_agency_id"] == 1
    assert load_objects_pre_transaction["transaction_normalized"][
        "awarding_agency_id"] == 1
    assert 2001 <= load_objects_pre_transaction["transaction_normalized"][
        "fiscal_year"] <= 2019