def perform_load(self, ids_to_delete, ids_to_insert):

        if len(ids_to_delete) > 0:
            with timer("deletion of all stale FPDS data", logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info("No FPDS records to delete at this juncture")

        if len(ids_to_insert) > 0:
            # Add FPDS records
            with timer("insertion of new FPDS data in batches", logger.info):
                self.insert_all_new_fpds(ids_to_insert)

            # Update Awards based on changed FPDS records
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                update_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer("updating contract-specific awards to reflect their latest transaction info", logger.info):
                update_contract_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update AwardCategories based on changed FPDS records
            with timer("updating award category variables", logger.info):
                update_award_categories(tuple(AWARD_UPDATE_ID_LIST))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer("updating C->D linkages", logger.info):
                update_c_to_d_linkages("contract")
        else:
            logger.info("No FPDS records to insert or modify at this juncture")
def test_award_update_contract_txn_with_list():
    """Test optional parameter to update specific awards from txn contract."""

    awards = mommy.make('awards.Award', _quantity=5)
    txn = mommy.make('awards.TransactionNormalized', award=awards[0])
    mommy.make('awards.TransactionFPDS',
               transaction=txn,
               base_and_all_options_value=1000)
    # single award is updated
    count = update_contract_awards((awards[0].id, ))
    awards[0].refresh_from_db()
    assert count == 1
    assert awards[0].base_and_all_options_value == 1000

    # update multipe awards
    txn1 = mommy.make('awards.TransactionNormalized', award=awards[1])
    mommy.make('awards.TransactionFPDS',
               transaction=txn1,
               base_and_all_options_value=4000)
    txn2 = mommy.make('awards.TransactionNormalized', award=awards[2])
    mommy.make('awards.TransactionFPDS',
               transaction=txn2,
               base_and_all_options_value=5000)
    # multiple awards updated
    count = update_contract_awards((awards[1].id, awards[2].id))
    awards[1].refresh_from_db()
    awards[2].refresh_from_db()
    assert count == 2
    assert awards[1].base_and_all_options_value == 4000
    assert awards[2].base_and_all_options_value == 5000
Example #3
0
def test_award_update_contract_txn_with_list():
    """Test optional parameter to update specific awards from txn contract."""

    awards = mommy.make('awards.Award', _quantity=5)
    txn = mommy.make('awards.Transaction', award=awards[0])
    mommy.make('awards.TransactionContract',
               transaction=txn,
               potential_total_value_of_award=1000)
    # single award is updated
    count = update_contract_awards((awards[0].id, ))
    awards[0].refresh_from_db()
    assert count == 1
    assert awards[0].potential_total_value_of_award == 1000

    # update multipe awards
    txn1 = mommy.make('awards.Transaction', award=awards[1])
    mommy.make('awards.TransactionContract',
               transaction=txn1,
               potential_total_value_of_award=4000)
    txn2 = mommy.make('awards.Transaction', award=awards[2])
    mommy.make('awards.TransactionContract',
               transaction=txn2,
               potential_total_value_of_award=5000)
    # multiple awards updated
    count = update_contract_awards((awards[1].id, awards[2].id))
    awards[1].refresh_from_db()
    awards[2].refresh_from_db()
    assert count == 2
    assert awards[1].potential_total_value_of_award == 4000
    assert awards[2].potential_total_value_of_award == 5000
    def handle(self, *args, **options):
        logger.info("==== Starting FPDS nightly data load ====")

        if options.get("date"):
            date = options.get("date")[0]
            date = datetime.strptime(date, "%Y-%m-%d").date()
        else:
            data_load_date_obj = ExternalDataLoadDate.objects.filter(
                external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]
            ).first()
            if not data_load_date_obj:
                date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
            else:
                date = data_load_date_obj.last_load_date
        start_date = datetime.utcnow().strftime("%Y-%m-%d")

        logger.info("Processing data for FPDS starting from %s" % date)

        with timer("retrieval of deleted FPDS IDs", logger.info):
            ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date)

        if len(ids_to_delete) > 0:
            with timer("deletion of all stale FPDS data", logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info("No FPDS records to delete at this juncture")

        with timer("retrieval of new/modified FPDS data ID list", logger.info):
            total_insert = self.get_fpds_transaction_ids(date=date)

        if len(total_insert) > 0:
            # Add FPDS records
            with timer("insertion of new FPDS data in batches", logger.info):
                self.insert_all_new_fpds(total_insert)

            # Update Awards based on changed FPDS records
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                update_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer("updating contract-specific awards to reflect their latest transaction info", logger.info):
                update_contract_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update AwardCategories based on changed FPDS records
            with timer("updating award category variables", logger.info):
                update_award_categories(tuple(AWARD_UPDATE_ID_LIST))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer("updating C->D linkages", logger.info):
                update_c_to_d_linkages("contract")
        else:
            logger.info("No FPDS records to insert or modify at this juncture")

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]).delete()
        ExternalDataLoadDate(
            last_load_date=start_date, external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]
        ).save()

        logger.info("FPDS NIGHTLY UPDATE COMPLETE")
Example #5
0
    def handle(self, *args, **options):
        logger.info('Starting FPDS nightly data load...')

        if options.get('date'):
            date = options.get('date')[0]
            date = datetime.strptime(date, '%Y-%m-%d').date()
        else:
            data_load_date_obj = ExternalDataLoadDate.objects. \
                filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).first()
            if not data_load_date_obj:
                date = (datetime.utcnow() -
                        timedelta(days=1)).strftime('%Y-%m-%d')
            else:
                date = data_load_date_obj.last_load_date
        start_date = datetime.utcnow().strftime('%Y-%m-%d')

        logger.info('Processing data for FPDS starting from %s' % date)

        with timer('retrieving/diff-ing FPDS Data', logger.info):
            to_insert, ids_to_delete = self.get_fpds_data(date=date)

        total_rows = len(to_insert)
        total_rows_delete = len(ids_to_delete)

        if total_rows_delete > 0:
            with timer('deleting stale FPDS data', logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info('Nothing to delete...')

        if total_rows > 0:
            with timer('inserting new FPDS data', logger.info):
                self.insert_new_fpds(to_insert=to_insert,
                                     total_rows=total_rows)

            with timer(
                    'updating awards to reflect their latest associated transaction info',
                    logger.info):
                update_awards(tuple(award_update_id_list))

            with timer(
                    'updating contract-specific awards to reflect their latest transaction info',
                    logger.info):
                update_contract_awards(tuple(award_update_id_list))

            with timer('updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))
        else:
            logger.info('Nothing to insert...')

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(
            external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']
        ).delete()
        ExternalDataLoadDate(last_load_date=start_date,
                             external_data_type_id=lookups.
                             EXTERNAL_DATA_TYPE_DICT['fpds']).save()

        logger.info('FPDS NIGHTLY UPDATE FINISHED!')
    def handle(self, *args, **options):
        logger.info("==== Starting FPDS nightly data load ====")

        if options.get("date"):
            date = options.get("date")[0]
            date = datetime.strptime(date, "%Y-%m-%d").date()
        else:
            default_last_load_date = datetime.now(timezone.utc) - timedelta(days=1)
            date = get_last_load_date("fpds", default=default_last_load_date).date()
        processing_start_datetime = datetime.now(timezone.utc)

        logger.info("Processing data for FPDS starting from %s" % date)

        with timer("retrieval of deleted FPDS IDs", logger.info):
            ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date)

        if len(ids_to_delete) > 0:
            with timer("deletion of all stale FPDS data", logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info("No FPDS records to delete at this juncture")

        with timer("retrieval of new/modified FPDS data ID list", logger.info):
            total_insert = self.get_fpds_transaction_ids(date=date)

        if len(total_insert) > 0:
            # Add FPDS records
            with timer("insertion of new FPDS data in batches", logger.info):
                self.insert_all_new_fpds(total_insert)

            # Update Awards based on changed FPDS records
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                update_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer("updating contract-specific awards to reflect their latest transaction info", logger.info):
                update_contract_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update AwardCategories based on changed FPDS records
            with timer("updating award category variables", logger.info):
                update_award_categories(tuple(AWARD_UPDATE_ID_LIST))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer("updating C->D linkages", logger.info):
                update_c_to_d_linkages("contract")
        else:
            logger.info("No FPDS records to insert or modify at this juncture")

        # Update the date for the last time the data load was run
        update_last_load_date("fpds", processing_start_datetime)

        logger.info("FPDS NIGHTLY UPDATE COMPLETE")
def test_award_update_contract_executive_comp():
    """Test executive comp is loaded correctly awards from txn contract."""

    award = mommy.make("awards.Award")
    txn = mommy.make("awards.TransactionNormalized", award=award, action_date="2011-10-01")
    txn2 = mommy.make("awards.TransactionNormalized", award=award, action_date="2012-10-01")
    mommy.make(
        "awards.TransactionFPDS",
        transaction=txn,
        officer_1_name="Professor Plum",
        officer_1_amount=1,
        officer_2_name="Mrs. White",
        officer_2_amount=2,
        officer_3_name="Mrs. Peacock",
        officer_3_amount=3,
        officer_4_name="Mr. Green",
        officer_4_amount=4,
        officer_5_name="Colonel Mustard",
        officer_5_amount=5,
    )
    mommy.make(
        "awards.TransactionFPDS",
        transaction=txn2,
        officer_1_name="Jack Mustard",
        officer_1_amount=100,
        officer_2_name="Jacob Green",
        officer_2_amount=200,
        officer_3_name="Diane White",
        officer_3_amount=300,
        officer_4_name="Kasandra Scarlet",
        officer_4_amount=400,
        officer_5_name="Victor Plum",
        officer_5_amount=500,
    )

    update_contract_awards()
    award.refresh_from_db()

    assert award.officer_1_name == "Jack Mustard"
    assert award.officer_5_amount == 500

    # Test that a newer transaction without Executive Comp data doesn't overwrite the award values

    txn3 = mommy.make("awards.TransactionNormalized", award=award, action_date="2013-10-01")
    mommy.make("awards.TransactionFPDS", transaction=txn3)

    update_contract_awards()
    award.refresh_from_db()

    assert award.officer_1_name == "Jack Mustard"
    assert award.officer_5_amount == 500
Example #8
0
    def handle(self, *args, **options):
        logger.info("Starting historical data load...")

        db_cursor = connections["data_broker"].cursor()
        fiscal_year = options.get("fiscal_year")
        page = options.get("page")
        limit = options.get("limit")

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info("Processing data for Fiscal Year " + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options["assistance"]:
            with timer("D1 historical data load", logger.info):
                self.update_transaction_contract(db_cursor=db_cursor,
                                                 fiscal_year=fiscal_year,
                                                 page=page,
                                                 limit=limit)

        if not options["contracts"]:
            with timer("D2 historical data load", logger.info):
                self.update_transaction_assistance(db_cursor=db_cursor,
                                                   fiscal_year=fiscal_year,
                                                   page=page,
                                                   limit=limit)

        with timer(
                "updating awards to reflect their latest associated transaction info",
                logger.info):
            update_awards(tuple(award_update_id_list))

        with timer(
                "updating assistance-specific awards to reflect their latest transaction info",
                logger.info):
            update_assistance_awards(tuple(award_assistance_update_id_list))

        with timer(
                "updating contract-specific awards to reflect their latest transaction info",
                logger.info):
            update_contract_awards(tuple(award_contract_update_id_list))

        # Done!
        logger.info("FINISHED")
def test_award_update_from_contract_transaction():
    """Test award updates specific to contract transactions."""

    # for contract type transactions,
    # the potential_total_value_of_award field
    # should updte the corresponding field on the award table
    award = mommy.make('awards.Award')
    txn = mommy.make('awards.Transaction', award=award)
    mommy.make('awards.TransactionContract',
               transaction=txn,
               potential_total_value_of_award=1000)

    update_contract_awards()
    award.refresh_from_db()

    assert award.potential_total_value_of_award == 1000
    def handle(self, *args, **options):
        logger.info('Starting historical data load...')

        db_cursor = connections['data_broker'].cursor()
        fiscal_year = options.get('fiscal_year')
        page = options.get('page')
        limit = options.get('limit')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options['assistance']:
            with timer('D1 historical data load', logger.info):
                self.update_transaction_contract(db_cursor=db_cursor,
                                                 fiscal_year=fiscal_year,
                                                 page=page,
                                                 limit=limit)

        if not options['contracts']:
            with timer('D2 historical data load', logger.info):
                self.update_transaction_assistance(db_cursor=db_cursor,
                                                   fiscal_year=fiscal_year,
                                                   page=page,
                                                   limit=limit)

        with timer(
                'updating awards to reflect their latest associated transaction info',
                logger.info):
            update_awards(tuple(award_update_id_list))

        with timer(
                'updating contract-specific awards to reflect their latest transaction info',
                logger.info):
            update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories(tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
    def handle(self, *args, **options):
        logger.info('Starting updates to award data...')

        all_records_flag = options.get('all')
        fiscal_year = options.get('fiscal_year')

        award_update_id_list = []
        award_contract_update_id_list = []

        if not all_records_flag:
            if fiscal_year:
                fiscal_year = fiscal_year[0]
                logger.info('Processing data for Fiscal Year ' +
                            str(fiscal_year))
            else:
                fiscal_year = 2017

            # Lists to store for update_awards and update_contract_awards
            award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\
                values_list('award_id', flat=True)
            award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\
                values_list('transaction__award_id', flat=True)

        with timer(
                'updating awards to reflect their latest associated transaction info',
                logger.info):
            update_awards() if all_records_flag else update_awards(
                tuple(award_update_id_list))

        with timer(
                'updating contract-specific awards to reflect their latest transaction info...',
                logger.info):
            if all_records_flag:
                update_contract_awards()
            else:
                update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories(
            ) if all_records_flag else update_award_categories(
                tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
def test_award_update_from_contract_transaction():
    """Test award updates specific to contract transactions."""

    # for contract type transactions, the base_and_all_options_value field should update the corresponding field on
    # the award table
    award = mommy.make('awards.Award')
    txn = mommy.make('awards.TransactionNormalized', award=award)
    txn2 = mommy.make('awards.TransactionNormalized', award=award)
    mommy.make('awards.TransactionFPDS',
               transaction=txn,
               base_and_all_options_value=1000)
    mommy.make('awards.TransactionFPDS',
               transaction=txn2,
               base_and_all_options_value=1001)

    update_contract_awards()
    award.refresh_from_db()

    assert award.base_and_all_options_value == 2001
def test_award_update_from_contract_transaction():
    """Test award updates specific to contract transactions."""

    # for contract type transactions, the base_and_all_options_value and base_exercised_options_val fields
    # should update the corresponding field on the award table
    award = mommy.make("awards.Award")
    txn = mommy.make("awards.TransactionNormalized", award=award)
    txn2 = mommy.make("awards.TransactionNormalized", award=award)
    mommy.make(
        "awards.TransactionFPDS", transaction=txn, base_and_all_options_value=1000, base_exercised_options_val=100
    )
    mommy.make(
        "awards.TransactionFPDS", transaction=txn2, base_and_all_options_value=1001, base_exercised_options_val=101
    )

    update_contract_awards()
    award.refresh_from_db()

    assert award.base_and_all_options_value == 2001
    assert award.base_exercised_options_val == 201
    def handle(self, *args, **options):
        logger.info('Starting historical data load...')

        db_cursor = connections['data_broker'].cursor()
        fiscal_year = options.get('fiscal_year')
        page = options.get('page')
        limit = options.get('limit')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options['assistance']:
            with timer('D1 historical data load', logger.info):
                self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit)

        if not options['contracts']:
            with timer('D2 historical data load', logger.info):
                self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit)

        with timer('updating awards to reflect their latest associated transaction info', logger.info):
            update_awards(tuple(award_update_id_list))

        with timer('updating contract-specific awards to reflect their latest transaction info', logger.info):
            update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories(tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
    def handle(self, *args, **options):
        logger.info('Starting updates to award data...')

        all_records_flag = options.get('all')
        fiscal_year = options.get('fiscal_year')

        award_update_id_list = []
        award_contract_update_id_list = []

        if not all_records_flag:
            if fiscal_year:
                fiscal_year = fiscal_year[0]
                logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
            else:
                fiscal_year = 2017

            # Lists to store for update_awards and update_contract_awards
            award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\
                values_list('award_id', flat=True)
            award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\
                values_list('transaction__award_id', flat=True)

        with timer('updating awards to reflect their latest associated transaction info', logger.info):
            update_awards() if all_records_flag else update_awards(tuple(award_update_id_list))

        with timer('updating contract-specific awards to reflect their latest transaction info...', logger.info):
            if all_records_flag:
                update_contract_awards()
            else:
                update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories() if all_records_flag else update_award_categories(tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
    def handle(self, *args, **options):

        awards_cache.clear()

        # Grab the data broker database connections
        if not options['test']:
            try:
                db_conn = connections['data_broker']
                db_cursor = db_conn.cursor()
            except Exception as err:
                logger.critical(
                    'Could not connect to database. Is DATA_BROKER_DATABASE_URL set?'
                )
                logger.critical(print(err))
                return
        else:
            db_cursor = PhonyCursor()

        # Grab the submission id
        submission_id = options['submission_id'][0]

        # Verify the ID exists in the database, and grab the data
        db_cursor.execute('SELECT * FROM submission WHERE submission_id = %s',
                          [submission_id])
        submission_data = dictfetchall(db_cursor)

        if len(submission_data) == 0:
            logger.error('Could not find submission with id ' +
                         str(submission_id))
            return
        elif len(submission_data) > 1:
            logger.error('Found multiple submissions with id ' +
                         str(submission_id))
            return

        # We have a single submission, which is what we want
        submission_data = submission_data[0]
        broker_submission_id = submission_data['submission_id']
        del submission_data[
            'submission_id']  # To avoid collisions with the newer PK system
        submission_attributes = get_submission_attributes(
            broker_submission_id, submission_data)

        # Move on, and grab file A data
        db_cursor.execute(
            'SELECT * FROM appropriation WHERE submission_id = %s',
            [submission_id])
        appropriation_data = dictfetchall(db_cursor)
        logger.info('Acquired appropriation data for ' + str(submission_id) +
                    ', there are ' + str(len(appropriation_data)) + ' rows.')
        load_file_a(submission_attributes, appropriation_data, db_cursor)

        # Let's get File B information
        db_cursor.execute(
            'SELECT * FROM object_class_program_activity WHERE submission_id = %s',
            [submission_id])
        prg_act_obj_cls_data = dictfetchall(db_cursor)
        logger.info('Acquired program activity object class data for ' +
                    str(submission_id) + ', there are ' +
                    str(len(prg_act_obj_cls_data)) + ' rows.')
        load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor)

        # File D2
        db_cursor.execute(
            'SELECT * FROM award_financial_assistance WHERE submission_id = %s',
            [submission_id])
        award_financial_assistance_data = dictfetchall(db_cursor)
        logger.info('Acquired award financial assistance data for ' +
                    str(submission_id) + ', there are ' +
                    str(len(award_financial_assistance_data)) + ' rows.')
        load_file_d2(submission_attributes, award_financial_assistance_data,
                     db_cursor)

        # File D1
        db_cursor.execute(
            'SELECT * FROM award_procurement WHERE submission_id = %s',
            [submission_id])
        procurement_data = dictfetchall(db_cursor)
        logger.info('Acquired award procurement data for ' +
                    str(submission_id) + ', there are ' +
                    str(len(procurement_data)) + ' rows.')
        load_file_d1(submission_attributes, procurement_data, db_cursor)

        # Let's get File C information
        # Note: we load File C last, because the D1 and D2 files have the awarding
        # agency top tier (CGAC) and sub tier data needed to look up/create
        # the most specific possible corresponding award. When looking up/
        # creating awards for File C, we dont have sub-tier agency info, so
        # we'll do our best to match them to the more specific award records
        # already created by the D file load
        db_cursor.execute(
            'SELECT * FROM award_financial WHERE submission_id = %s',
            [submission_id])
        award_financial_data = dictfetchall(db_cursor)
        logger.info('Acquired award financial data for ' + str(submission_id) +
                    ', there are ' + str(len(award_financial_data)) + ' rows.')
        load_file_c(submission_attributes, award_financial_data, db_cursor)

        # Once all the files have been processed, run any global
        # cleanup/post-load tasks.
        # 1. Update the descriptions TODO: If this is slow, add ID limiting as above
        update_model_description_fields()
        # 2. Update awards to reflect their latest associated txn info
        update_awards(tuple(AWARD_UPDATE_ID_LIST))
        # 3. Update contract-specific award fields to reflect latest txn info
        update_contract_awards(tuple(AWARD_CONTRACT_UPDATE_ID_LIST))
Example #17
0
    def handle(self, *args, **options):

        csv_file = options['file'][0]
        self.logger.info("Starting load for file {}".format(csv_file))

        # Create the csv reader
        reader = CsvDataReader(csv_file)

        # Create a new submission attributes object for this timestamp
        subattr = SubmissionAttributes()
        subattr.usaspending_update = datetime.now()
        subattr.save()

        # Create lists to hold model instances for bulk insert
        txn_list = []
        txn_contract_list = []

        subtier_agency_dict = h.get_subtier_agency_dict()

        # Store some additional support data needed for the laod
        award_type_dict = {a[0]: a[1] for a in AWARD_TYPES}
        contract_pricing_dict = {c[0]: c[1] for c in CONTRACT_PRICING_TYPES}

        for idx, row in enumerate(reader):
            if len(reader) % 1000 == 0:
                self.logger.info("Read row {}".format(len(reader)))
            row = h.cleanse_values(row)

            awarding_agency_id = self.get_agency_id(row["contractingofficeagencyid"], subtier_agency_dict)

            # Create the transaction object for this row
            txn_dict = {
                "action_date": h.convert_date(row['signeddate']),
                "award": self.get_or_create_award(row, awarding_agency_id),
                "awarding_agency_id": awarding_agency_id,
                "data_source": "USA",
                "description": row["descriptionofcontractrequirement"],
                "federal_action_obligation": row["dollarsobligated"],
                "funding_agency_id": self.get_agency_id(row["fundingrequestingagencyid"], subtier_agency_dict),
                "last_modified_date": h.convert_date(row['last_modified_date']),
                "modification_number": row["modnumber"],
                "place_of_performance": h.get_or_create_location(
                    row, mapper=location_mapper_place_of_performance),
                "period_of_performance_current_end_date": h.convert_date(row['currentcompletiondate']),
                "period_of_performance_start_date": h.convert_date(row['effectivedate']),
                "recipient": self.get_or_create_recipient(row),
                "submission": subattr,
                "type": evaluate_contract_award_type(row),
                "type_description": award_type_dict.get(evaluate_contract_award_type(row)),
                "usaspending_unique_transaction_id": row["unique_transaction_id"]
            }
            txn = Transaction(**txn_dict)
            txn_list.append(txn)

            # Create the transaction contract object for this row
            txn_contract_dict = {
                "submission": subattr,
                "piid": row['piid'],
                "parent_award_id": row['idvpiid'],
                "current_total_value_award": h.parse_numeric_value(row["baseandexercisedoptionsvalue"]),
                "period_of_performance_potential_end_date": h.convert_date(row['ultimatecompletiondate']),
                "potential_total_value_of_award": h.parse_numeric_value(row["baseandalloptionsvalue"]),
                "epa_designated_product": self.parse_first_character(row['useofepadesignatedproducts']),
                "gfe_gfp": h.up2colon(row['gfe_gfp']),
                "cost_or_pricing_data": h.up2colon(row['costorpricingdata']),
                "type_of_contract_pricing": h.up2colon(row['typeofcontractpricing']),
                "type_of_contract_pricing_description": contract_pricing_dict.get(h.up2colon(row['typeofcontractpricing'])),
                "multiple_or_single_award_idv": h.up2colon(row['multipleorsingleawardidc']),
                "naics": h.up2colon(row['nationalinterestactioncode']),
                "dod_claimant_program_code": h.up2colon(row['claimantprogramcode']),
                "commercial_item_acquisition_procedures": h.up2colon(
                    row['commercialitemacquisitionprocedures']),
                "commercial_item_test_program": h.up2colon(row['commercialitemtestprogram']),
                "consolidated_contract": h.up2colon(row['consolidatedcontract']),
                "contingency_humanitarian_or_peacekeeping_operation": h.up2colon(
                    row['contingencyhumanitarianpeacekeepingoperation']),
                "contract_bundling": h.up2colon(row['contractbundling']),
                "contract_financing": h.up2colon(row['contractfinancing']),
                "contracting_officers_determination_of_business_size": h.up2colon(
                    row['contractingofficerbusinesssizedetermination']),
                "country_of_product_or_service_origin": h.up2colon(row['countryoforigin']),
                "davis_bacon_act": h.up2colon(row['davisbaconact']),
                "evaluated_preference": h.up2colon(row['evaluatedpreference']),
                "extent_competed": h.up2colon(row['extentcompeted']),
                "information_technology_commercial_item_category": h.up2colon(
                    row['informationtechnologycommercialitemcategory']),
                "interagency_contracting_authority": h.up2colon(row['interagencycontractingauthority']),
                "local_area_set_aside": h.up2colon(row['localareasetaside']),
                "purchase_card_as_payment_method": h.up2colon(row['purchasecardaspaymentmethod']),
                "multi_year_contract": h.up2colon(row['multiyearcontract']),
                "national_interest_action": h.up2colon(row['nationalinterestactioncode']),
                "number_of_actions": h.up2colon(row['numberofactions']),
                "number_of_offers_received": h.up2colon(row['numberofoffersreceived']),
                "performance_based_service_acquisition": h.up2colon(row['performancebasedservicecontract']),
                "place_of_manufacture": h.up2colon(row['placeofmanufacture']),
                "product_or_service_code": h.up2colon(row['productorservicecode']),
                "recovered_materials_sustainability": h.up2colon(row['recoveredmaterialclauses']),
                "research": h.up2colon(row['research']),
                "sea_transportation": h.up2colon(row['seatransportation']),
                "service_contract_act": h.up2colon(row['servicecontractact']),
                "small_business_competitiveness_demonstration_program": self.parse_first_character(
                    row['smallbusinesscompetitivenessdemonstrationprogram']),
                "solicitation_procedures": h.up2colon(row['solicitationprocedures']),
                "subcontracting_plan": h.up2colon(row['subcontractplan']),
                "type_set_aside": h.up2colon(row['typeofsetaside']),
                "walsh_healey_act": h.up2colon(row['walshhealyact']),
                "rec_flag": self.parse_first_character(h.up2colon(row['rec_flag'])),
                "type_of_idc": self.parse_first_character(row['typeofidc']),
                "a76_fair_act_action": self.parse_first_character(row['a76action']),
                "clinger_cohen_act_planning": self.parse_first_character(row['clingercohenact']),
                "cost_accounting_standards": self.parse_first_character(
                    row['costaccountingstandardsclause']),
                "fed_biz_opps": self.parse_first_character(row['fedbizopps']),
                "foreign_funding": self.parse_first_character(row['fundedbyforeignentity']),
                "major_program": self.parse_first_character(row['majorprogramcode']),
                "program_acronym": self.parse_first_character(row['programacronym']),
                "referenced_idv_modification_number": self.parse_first_character(
                    row['idvmodificationnumber']),
                "transaction_number": self.parse_first_character(row['transactionnumber']),
                "solicitation_identifier": self.parse_first_character(row['solicitationid'])
            }
            txn_contract = TransactionContract(**txn_contract_dict)
            txn_contract_list.append(txn_contract)

        # Bulk insert transaction rows
        self.logger.info("Starting Transaction bulk insert ({} records)".format(len(txn_list)))
        Transaction.objects.bulk_create(txn_list)
        self.logger.info("Completed Transaction bulk insert")
        # Update txn contract list with newly-inserted transactions
        award_id_list = []  # we'll need this when updating the awards later on
        for idx, t in enumerate(txn_contract_list):
            # add transaction info to this TransactionContract object
            t.transaction = txn_list[idx]
            # add the corresponding award id to a list we'll use when batch-updating award data
            award_id_list.append(txn_list[idx].award_id)
        # Bulk insert transaction contract rows
        self.logger.info("Starting TransactionContract bulk insert ({} records)".format(len(txn_contract_list)))
        TransactionContract.objects.bulk_create(txn_contract_list)
        self.logger.info("Completed TransactionContract bulk insert")

        # Update awards to reflect latest transaction information
        # (note that this can't be done via signals or a save()
        # override in the model itself, because those aren't
        # triggered by a bulk update
        self.logger.info("Starting Awards update")
        count = update_awards(tuple(award_id_list))
        update_contract_awards(tuple(award_id_list))
        self.logger.info("Completed Awards update ({} records)".format(count))
    def handle(self, *args, **options):
        logger.info('Starting FPDS bulk data load...')

        db_cursor = connections['data_broker'].cursor()
        ds_cursor = connection.cursor()
        fiscal_year = options.get('fiscal_year')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
        else:
            fiscal_year = 2017

        logger.info('Processing data for Fiscal Year ' + str(fiscal_year))

        with timer('Diff-ing FPDS data', logger.info):
            to_insert, to_delete = self.diff_fpds_data(db_cursor=db_cursor,
                                                       ds_cursor=ds_cursor,
                                                       fiscal_year=fiscal_year)

        total_rows = len(to_insert)
        total_rows_delete = len(to_delete)

        if total_rows_delete > 0:
            with timer('Deleting stale FPDS data', logger.info):
                self.delete_stale_fpds(to_delete=to_delete)

        if total_rows > 0:
            # Set lookups after deletions to only get latest
            self.set_lookup_maps()

            with timer('Get Broker FPDS data', logger.info):
                fpds_broker_data = self.get_fpds_data(db_cursor=db_cursor,
                                                      fiscal_year=fiscal_year,
                                                      to_insert=to_insert)

            with timer('Loading POP Location data', logger.info):
                self.load_locations(fpds_broker_data=fpds_broker_data,
                                    total_rows=total_rows,
                                    pop_flag=True)

            with timer('Loading LE Location data', logger.info):
                self.load_locations(fpds_broker_data=fpds_broker_data,
                                    total_rows=total_rows)

            with timer('Loading Legal Entity data', logger.info):
                self.load_legal_entity(fpds_broker_data=fpds_broker_data,
                                       total_rows=total_rows)

            with timer('Loading Parent Award data', logger.info):
                self.load_parent_awards(fpds_broker_data=fpds_broker_data,
                                        total_rows=total_rows)

            with timer('Loading Award data', logger.info):
                self.load_awards(fpds_broker_data=fpds_broker_data,
                                 total_rows=total_rows)

            with timer('Loading Transaction Normalized data', logger.info):
                self.load_transaction_normalized(
                    fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Transaction FPDS data', logger.info):
                self.load_transaction_fpds(fpds_broker_data=fpds_broker_data,
                                           total_rows=total_rows)

            award_update_id_list = [award.id for award in award_lookup]

            with timer(
                    'Updating awards to reflect their latest associated transaction info',
                    logger.info):
                update_awards(tuple(award_update_id_list))

            with timer(
                    'Updating contract-specific awards to reflect their latest transaction info',
                    logger.info):
                update_contract_awards(tuple(award_update_id_list))

            with timer('Updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))
        else:
            logger.info('Nothing to insert...FINISHED!')
    def handle(self, *args, **options):

        h.clear_caches()

        csv_file = options['file'][0]
        self.logger.info("Starting load for file {}".format(csv_file))

        # Create the csv reader
        reader = CsvDataReader(csv_file)

        # Create a new submission attributes object for this timestamp
        subattr = SubmissionAttributes()
        subattr.usaspending_update = datetime.now()
        subattr.save()

        # Create lists to hold model instances for bulk insert
        txn_list = []
        txn_assistance_list = []

        for idx, row in enumerate(reader):
            if len(reader) % 1000 == 0:
                self.logger.info("Read row {}".format(len(reader)))
            row = h.cleanse_values(row)

            awarding_agency = self.get_awarding_agency(
                row)  # todo: use agency dict?

            # Create the transaction object for this row
            txn_dict = {
                "submission":
                subattr,
                "action_date":
                h.convert_date(row['obligation_action_date']),
                "action_type":
                h.up2colon(row['action_type']),
                "award":
                self.get_or_create_award(row, awarding_agency=awarding_agency),
                "awarding_agency":
                awarding_agency,
                "description":
                row["project_description"],  # ?? account_title is anther contender?
                "data_source":
                "USA",
                "federal_action_obligation":
                row["fed_funding_amount"],
                "last_modified_date":
                h.convert_date(row['last_modified_date']),
                "modification_number":
                row["federal_award_mod"],  # ??
                "period_of_performance_start_date":
                h.convert_date(row['starting_date']),
                "period_of_performance_current_end_date":
                h.convert_date(row['ending_date']),
                "place_of_performance":
                h.get_or_create_location(
                    row, location_mapper_fin_assistance_principal_place),
                "recipient":
                self.get_or_create_recipient(row),
                "type":
                h.up2colon(row['assistance_type']),
                "usaspending_unique_transaction_id":
                row["unique_transaction_id"],

                # ??"funding_agency_id":
                # ?? "certified date":
            }
            txn = Transaction(**txn_dict)
            txn.fiscal_year = fy(txn.action_date)
            txn_list.append(txn)

            # Create the transaction contract object for this row
            txn_assistance_dict = {
                "submission":
                subattr,
                "fain":
                row["federal_award_id"],
                "uri":
                row["uri"],
                "cfda":
                Cfda.objects.filter(
                    program_number=row["cfda_program_num"]).first(),
                "correction_late_delete_indicator":
                h.up2colon(row['correction_late_ind']),
                "face_value_loan_guarantee":
                row["face_loan_guran"],
                "fiscal_year_and_quarter_correction":
                row["fyq_correction"],
                "non_federal_funding_amount":
                row["non_fed_funding_amount"],
                "original_loan_subsidy_cost":
                row["orig_sub_guran"],  # ??
                "record_type":
                int(h.up2colon(row['record_type'])),
                "sai_number":
                row["sai_number"],
                "submitted_type":
                "C",  # ?? For CSV?
            }
            # ?? business_funds_indicator
            # ?? reporting period start/end??

            txn_assistance = TransactionAssistance(**txn_assistance_dict)
            txn_assistance_list.append(txn_assistance)

        # Bulk insert transaction rows
        self.logger.info(
            "Starting Transaction bulk insert ({} records)".format(
                len(txn_list)))
        Transaction.objects.bulk_create(txn_list)
        self.logger.info("Completed Transaction bulk insert")
        # Update txn assistance list with newly-inserted transactions
        award_id_list = []  # we'll need this when updating the awards later on
        for idx, t in enumerate(txn_assistance_list):
            t.transaction = txn_list[idx]
            award_id_list.append(txn_list[idx].award_id)
        # Bulk insert transaction assistance rows
        self.logger.info(
            "Starting TransactionAssistance bulk insert ({} records)".format(
                len(txn_assistance_list)))
        TransactionAssistance.objects.bulk_create(txn_assistance_list)
        self.logger.info("Completed TransactionAssistance bulk insert")

        # Update awards to reflect latest transaction information
        # (note that this can't be done via signals or a save()
        # override in the model itself, because those aren't
        # triggered by a bulk update
        self.logger.info("Starting Awards update")
        count = update_awards(tuple(award_id_list))
        update_contract_awards(tuple(award_id_list))
        update_model_description_fields()
        self.logger.info("Completed Awards update ({} records)".format(count))
    def handle(self, *args, **options):
        logger.info('Starting FPDS bulk data load...')

        db_cursor = connections['data_broker'].cursor()
        ds_cursor = connection.cursor()
        fiscal_year = options.get('fiscal_year')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
        else:
            fiscal_year = 2017

        logger.info('Processing data for Fiscal Year ' + str(fiscal_year))

        with timer('Diff-ing FPDS data', logger.info):
            to_insert, to_delete = self.diff_fpds_data(db_cursor=db_cursor,
                                                       ds_cursor=ds_cursor,
                                                       fiscal_year=fiscal_year)

        total_rows = len(to_insert)
        total_rows_delete = len(to_delete)

        if total_rows_delete > 0:
            with timer('Deleting stale FPDS data', logger.info):
                self.delete_stale_fpds(to_delete=to_delete)

        if total_rows > 0:
            # Set lookups after deletions to only get latest
            self.set_lookup_maps()

            with timer('Get Broker FPDS data', logger.info):
                fpds_broker_data = self.get_fpds_data(
                    db_cursor=db_cursor, fiscal_year=fiscal_year, to_insert=to_insert)

            with timer('Loading POP Location data', logger.info):
                self.load_locations(fpds_broker_data=fpds_broker_data, total_rows=total_rows, pop_flag=True)

            with timer('Loading LE Location data', logger.info):
                self.load_locations(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Legal Entity data', logger.info):
                self.load_legal_entity(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Parent Award data', logger.info):
                self.load_parent_awards(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Award data', logger.info):
                self.load_awards(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Transaction Normalized data', logger.info):
                self.load_transaction_normalized(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Transaction FPDS data', logger.info):
                self.load_transaction_fpds(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            award_update_id_list = [award.id for award in award_lookup]

            with timer('Updating awards to reflect their latest associated transaction info', logger.info):
                update_awards(tuple(award_update_id_list))

            with timer('Updating contract-specific awards to reflect their latest transaction info', logger.info):
                update_contract_awards(tuple(award_update_id_list))

            with timer('Updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))
        else:
            logger.info('Nothing to insert...FINISHED!')