def perform_load(self, ids_to_delete, ids_to_insert): if len(ids_to_delete) > 0: with timer("deletion of all stale FPDS data", logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info("No FPDS records to delete at this juncture") if len(ids_to_insert) > 0: # Add FPDS records with timer("insertion of new FPDS data in batches", logger.info): self.insert_all_new_fpds(ids_to_insert) # Update Awards based on changed FPDS records with timer("updating awards to reflect their latest associated transaction info", logger.info): update_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update FPDS-specific Awards based on the info in child transactions with timer("updating contract-specific awards to reflect their latest transaction info", logger.info): update_contract_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update AwardCategories based on changed FPDS records with timer("updating award category variables", logger.info): update_award_categories(tuple(AWARD_UPDATE_ID_LIST)) # Check the linkages from file C to FPDS records and update any that are missing with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("contract") else: logger.info("No FPDS records to insert or modify at this juncture")
def test_award_update_contract_txn_with_list(): """Test optional parameter to update specific awards from txn contract.""" awards = mommy.make('awards.Award', _quantity=5) txn = mommy.make('awards.TransactionNormalized', award=awards[0]) mommy.make('awards.TransactionFPDS', transaction=txn, base_and_all_options_value=1000) # single award is updated count = update_contract_awards((awards[0].id, )) awards[0].refresh_from_db() assert count == 1 assert awards[0].base_and_all_options_value == 1000 # update multipe awards txn1 = mommy.make('awards.TransactionNormalized', award=awards[1]) mommy.make('awards.TransactionFPDS', transaction=txn1, base_and_all_options_value=4000) txn2 = mommy.make('awards.TransactionNormalized', award=awards[2]) mommy.make('awards.TransactionFPDS', transaction=txn2, base_and_all_options_value=5000) # multiple awards updated count = update_contract_awards((awards[1].id, awards[2].id)) awards[1].refresh_from_db() awards[2].refresh_from_db() assert count == 2 assert awards[1].base_and_all_options_value == 4000 assert awards[2].base_and_all_options_value == 5000
def test_award_update_contract_txn_with_list(): """Test optional parameter to update specific awards from txn contract.""" awards = mommy.make('awards.Award', _quantity=5) txn = mommy.make('awards.Transaction', award=awards[0]) mommy.make('awards.TransactionContract', transaction=txn, potential_total_value_of_award=1000) # single award is updated count = update_contract_awards((awards[0].id, )) awards[0].refresh_from_db() assert count == 1 assert awards[0].potential_total_value_of_award == 1000 # update multipe awards txn1 = mommy.make('awards.Transaction', award=awards[1]) mommy.make('awards.TransactionContract', transaction=txn1, potential_total_value_of_award=4000) txn2 = mommy.make('awards.Transaction', award=awards[2]) mommy.make('awards.TransactionContract', transaction=txn2, potential_total_value_of_award=5000) # multiple awards updated count = update_contract_awards((awards[1].id, awards[2].id)) awards[1].refresh_from_db() awards[2].refresh_from_db() assert count == 2 assert awards[1].potential_total_value_of_award == 4000 assert awards[2].potential_total_value_of_award == 5000
def handle(self, *args, **options): logger.info("==== Starting FPDS nightly data load ====") if options.get("date"): date = options.get("date")[0] date = datetime.strptime(date, "%Y-%m-%d").date() else: data_load_date_obj = ExternalDataLoadDate.objects.filter( external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"] ).first() if not data_load_date_obj: date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d") else: date = data_load_date_obj.last_load_date start_date = datetime.utcnow().strftime("%Y-%m-%d") logger.info("Processing data for FPDS starting from %s" % date) with timer("retrieval of deleted FPDS IDs", logger.info): ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date) if len(ids_to_delete) > 0: with timer("deletion of all stale FPDS data", logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info("No FPDS records to delete at this juncture") with timer("retrieval of new/modified FPDS data ID list", logger.info): total_insert = self.get_fpds_transaction_ids(date=date) if len(total_insert) > 0: # Add FPDS records with timer("insertion of new FPDS data in batches", logger.info): self.insert_all_new_fpds(total_insert) # Update Awards based on changed FPDS records with timer("updating awards to reflect their latest associated transaction info", logger.info): update_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update FPDS-specific Awards based on the info in child transactions with timer("updating contract-specific awards to reflect their latest transaction info", logger.info): update_contract_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update AwardCategories based on changed FPDS records with timer("updating award category variables", logger.info): update_award_categories(tuple(AWARD_UPDATE_ID_LIST)) # Check the linkages from file C to FPDS records and update any that are missing with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("contract") else: logger.info("No FPDS records to insert or modify at this juncture") # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]).delete() ExternalDataLoadDate( last_load_date=start_date, external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"] ).save() logger.info("FPDS NIGHTLY UPDATE COMPLETE")
def handle(self, *args, **options): logger.info('Starting FPDS nightly data load...') if options.get('date'): date = options.get('date')[0] date = datetime.strptime(date, '%Y-%m-%d').date() else: data_load_date_obj = ExternalDataLoadDate.objects. \ filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).first() if not data_load_date_obj: date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d') else: date = data_load_date_obj.last_load_date start_date = datetime.utcnow().strftime('%Y-%m-%d') logger.info('Processing data for FPDS starting from %s' % date) with timer('retrieving/diff-ing FPDS Data', logger.info): to_insert, ids_to_delete = self.get_fpds_data(date=date) total_rows = len(to_insert) total_rows_delete = len(ids_to_delete) if total_rows_delete > 0: with timer('deleting stale FPDS data', logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info('Nothing to delete...') if total_rows > 0: with timer('inserting new FPDS data', logger.info): self.insert_new_fpds(to_insert=to_insert, total_rows=total_rows) with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer( 'updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...') # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter( external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds'] ).delete() ExternalDataLoadDate(last_load_date=start_date, external_data_type_id=lookups. EXTERNAL_DATA_TYPE_DICT['fpds']).save() logger.info('FPDS NIGHTLY UPDATE FINISHED!')
def handle(self, *args, **options): logger.info("==== Starting FPDS nightly data load ====") if options.get("date"): date = options.get("date")[0] date = datetime.strptime(date, "%Y-%m-%d").date() else: default_last_load_date = datetime.now(timezone.utc) - timedelta(days=1) date = get_last_load_date("fpds", default=default_last_load_date).date() processing_start_datetime = datetime.now(timezone.utc) logger.info("Processing data for FPDS starting from %s" % date) with timer("retrieval of deleted FPDS IDs", logger.info): ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date) if len(ids_to_delete) > 0: with timer("deletion of all stale FPDS data", logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info("No FPDS records to delete at this juncture") with timer("retrieval of new/modified FPDS data ID list", logger.info): total_insert = self.get_fpds_transaction_ids(date=date) if len(total_insert) > 0: # Add FPDS records with timer("insertion of new FPDS data in batches", logger.info): self.insert_all_new_fpds(total_insert) # Update Awards based on changed FPDS records with timer("updating awards to reflect their latest associated transaction info", logger.info): update_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update FPDS-specific Awards based on the info in child transactions with timer("updating contract-specific awards to reflect their latest transaction info", logger.info): update_contract_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update AwardCategories based on changed FPDS records with timer("updating award category variables", logger.info): update_award_categories(tuple(AWARD_UPDATE_ID_LIST)) # Check the linkages from file C to FPDS records and update any that are missing with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("contract") else: logger.info("No FPDS records to insert or modify at this juncture") # Update the date for the last time the data load was run update_last_load_date("fpds", processing_start_datetime) logger.info("FPDS NIGHTLY UPDATE COMPLETE")
def test_award_update_contract_executive_comp(): """Test executive comp is loaded correctly awards from txn contract.""" award = mommy.make("awards.Award") txn = mommy.make("awards.TransactionNormalized", award=award, action_date="2011-10-01") txn2 = mommy.make("awards.TransactionNormalized", award=award, action_date="2012-10-01") mommy.make( "awards.TransactionFPDS", transaction=txn, officer_1_name="Professor Plum", officer_1_amount=1, officer_2_name="Mrs. White", officer_2_amount=2, officer_3_name="Mrs. Peacock", officer_3_amount=3, officer_4_name="Mr. Green", officer_4_amount=4, officer_5_name="Colonel Mustard", officer_5_amount=5, ) mommy.make( "awards.TransactionFPDS", transaction=txn2, officer_1_name="Jack Mustard", officer_1_amount=100, officer_2_name="Jacob Green", officer_2_amount=200, officer_3_name="Diane White", officer_3_amount=300, officer_4_name="Kasandra Scarlet", officer_4_amount=400, officer_5_name="Victor Plum", officer_5_amount=500, ) update_contract_awards() award.refresh_from_db() assert award.officer_1_name == "Jack Mustard" assert award.officer_5_amount == 500 # Test that a newer transaction without Executive Comp data doesn't overwrite the award values txn3 = mommy.make("awards.TransactionNormalized", award=award, action_date="2013-10-01") mommy.make("awards.TransactionFPDS", transaction=txn3) update_contract_awards() award.refresh_from_db() assert award.officer_1_name == "Jack Mustard" assert award.officer_5_amount == 500
def handle(self, *args, **options): logger.info("Starting historical data load...") db_cursor = connections["data_broker"].cursor() fiscal_year = options.get("fiscal_year") page = options.get("page") limit = options.get("limit") if fiscal_year: fiscal_year = fiscal_year[0] logger.info("Processing data for Fiscal Year " + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options["assistance"]: with timer("D1 historical data load", logger.info): self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) if not options["contracts"]: with timer("D2 historical data load", logger.info): self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) with timer( "updating awards to reflect their latest associated transaction info", logger.info): update_awards(tuple(award_update_id_list)) with timer( "updating assistance-specific awards to reflect their latest transaction info", logger.info): update_assistance_awards(tuple(award_assistance_update_id_list)) with timer( "updating contract-specific awards to reflect their latest transaction info", logger.info): update_contract_awards(tuple(award_contract_update_id_list)) # Done! logger.info("FINISHED")
def test_award_update_from_contract_transaction(): """Test award updates specific to contract transactions.""" # for contract type transactions, # the potential_total_value_of_award field # should updte the corresponding field on the award table award = mommy.make('awards.Award') txn = mommy.make('awards.Transaction', award=award) mommy.make('awards.TransactionContract', transaction=txn, potential_total_value_of_award=1000) update_contract_awards() award.refresh_from_db() assert award.potential_total_value_of_award == 1000
def handle(self, *args, **options): logger.info('Starting historical data load...') db_cursor = connections['data_broker'].cursor() fiscal_year = options.get('fiscal_year') page = options.get('page') limit = options.get('limit') if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options['assistance']: with timer('D1 historical data load', logger.info): self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) if not options['contracts']: with timer('D2 historical data load', logger.info): self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer( 'updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): logger.info('Starting updates to award data...') all_records_flag = options.get('all') fiscal_year = options.get('fiscal_year') award_update_id_list = [] award_contract_update_id_list = [] if not all_records_flag: if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 # Lists to store for update_awards and update_contract_awards award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\ values_list('award_id', flat=True) award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\ values_list('transaction__award_id', flat=True) with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards() if all_records_flag else update_awards( tuple(award_update_id_list)) with timer( 'updating contract-specific awards to reflect their latest transaction info...', logger.info): if all_records_flag: update_contract_awards() else: update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories( ) if all_records_flag else update_award_categories( tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def test_award_update_from_contract_transaction(): """Test award updates specific to contract transactions.""" # for contract type transactions, the base_and_all_options_value field should update the corresponding field on # the award table award = mommy.make('awards.Award') txn = mommy.make('awards.TransactionNormalized', award=award) txn2 = mommy.make('awards.TransactionNormalized', award=award) mommy.make('awards.TransactionFPDS', transaction=txn, base_and_all_options_value=1000) mommy.make('awards.TransactionFPDS', transaction=txn2, base_and_all_options_value=1001) update_contract_awards() award.refresh_from_db() assert award.base_and_all_options_value == 2001
def test_award_update_from_contract_transaction(): """Test award updates specific to contract transactions.""" # for contract type transactions, the base_and_all_options_value and base_exercised_options_val fields # should update the corresponding field on the award table award = mommy.make("awards.Award") txn = mommy.make("awards.TransactionNormalized", award=award) txn2 = mommy.make("awards.TransactionNormalized", award=award) mommy.make( "awards.TransactionFPDS", transaction=txn, base_and_all_options_value=1000, base_exercised_options_val=100 ) mommy.make( "awards.TransactionFPDS", transaction=txn2, base_and_all_options_value=1001, base_exercised_options_val=101 ) update_contract_awards() award.refresh_from_db() assert award.base_and_all_options_value == 2001 assert award.base_exercised_options_val == 201
def handle(self, *args, **options): logger.info('Starting historical data load...') db_cursor = connections['data_broker'].cursor() fiscal_year = options.get('fiscal_year') page = options.get('page') limit = options.get('limit') if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options['assistance']: with timer('D1 historical data load', logger.info): self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) if not options['contracts']: with timer('D2 historical data load', logger.info): self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer('updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): logger.info('Starting updates to award data...') all_records_flag = options.get('all') fiscal_year = options.get('fiscal_year') award_update_id_list = [] award_contract_update_id_list = [] if not all_records_flag: if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 # Lists to store for update_awards and update_contract_awards award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\ values_list('award_id', flat=True) award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\ values_list('transaction__award_id', flat=True) with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards() if all_records_flag else update_awards(tuple(award_update_id_list)) with timer('updating contract-specific awards to reflect their latest transaction info...', logger.info): if all_records_flag: update_contract_awards() else: update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories() if all_records_flag else update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): awards_cache.clear() # Grab the data broker database connections if not options['test']: try: db_conn = connections['data_broker'] db_cursor = db_conn.cursor() except Exception as err: logger.critical( 'Could not connect to database. Is DATA_BROKER_DATABASE_URL set?' ) logger.critical(print(err)) return else: db_cursor = PhonyCursor() # Grab the submission id submission_id = options['submission_id'][0] # Verify the ID exists in the database, and grab the data db_cursor.execute('SELECT * FROM submission WHERE submission_id = %s', [submission_id]) submission_data = dictfetchall(db_cursor) if len(submission_data) == 0: logger.error('Could not find submission with id ' + str(submission_id)) return elif len(submission_data) > 1: logger.error('Found multiple submissions with id ' + str(submission_id)) return # We have a single submission, which is what we want submission_data = submission_data[0] broker_submission_id = submission_data['submission_id'] del submission_data[ 'submission_id'] # To avoid collisions with the newer PK system submission_attributes = get_submission_attributes( broker_submission_id, submission_data) # Move on, and grab file A data db_cursor.execute( 'SELECT * FROM appropriation WHERE submission_id = %s', [submission_id]) appropriation_data = dictfetchall(db_cursor) logger.info('Acquired appropriation data for ' + str(submission_id) + ', there are ' + str(len(appropriation_data)) + ' rows.') load_file_a(submission_attributes, appropriation_data, db_cursor) # Let's get File B information db_cursor.execute( 'SELECT * FROM object_class_program_activity WHERE submission_id = %s', [submission_id]) prg_act_obj_cls_data = dictfetchall(db_cursor) logger.info('Acquired program activity object class data for ' + str(submission_id) + ', there are ' + str(len(prg_act_obj_cls_data)) + ' rows.') load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor) # File D2 db_cursor.execute( 'SELECT * FROM award_financial_assistance WHERE submission_id = %s', [submission_id]) award_financial_assistance_data = dictfetchall(db_cursor) logger.info('Acquired award financial assistance data for ' + str(submission_id) + ', there are ' + str(len(award_financial_assistance_data)) + ' rows.') load_file_d2(submission_attributes, award_financial_assistance_data, db_cursor) # File D1 db_cursor.execute( 'SELECT * FROM award_procurement WHERE submission_id = %s', [submission_id]) procurement_data = dictfetchall(db_cursor) logger.info('Acquired award procurement data for ' + str(submission_id) + ', there are ' + str(len(procurement_data)) + ' rows.') load_file_d1(submission_attributes, procurement_data, db_cursor) # Let's get File C information # Note: we load File C last, because the D1 and D2 files have the awarding # agency top tier (CGAC) and sub tier data needed to look up/create # the most specific possible corresponding award. When looking up/ # creating awards for File C, we dont have sub-tier agency info, so # we'll do our best to match them to the more specific award records # already created by the D file load db_cursor.execute( 'SELECT * FROM award_financial WHERE submission_id = %s', [submission_id]) award_financial_data = dictfetchall(db_cursor) logger.info('Acquired award financial data for ' + str(submission_id) + ', there are ' + str(len(award_financial_data)) + ' rows.') load_file_c(submission_attributes, award_financial_data, db_cursor) # Once all the files have been processed, run any global # cleanup/post-load tasks. # 1. Update the descriptions TODO: If this is slow, add ID limiting as above update_model_description_fields() # 2. Update awards to reflect their latest associated txn info update_awards(tuple(AWARD_UPDATE_ID_LIST)) # 3. Update contract-specific award fields to reflect latest txn info update_contract_awards(tuple(AWARD_CONTRACT_UPDATE_ID_LIST))
def handle(self, *args, **options): csv_file = options['file'][0] self.logger.info("Starting load for file {}".format(csv_file)) # Create the csv reader reader = CsvDataReader(csv_file) # Create a new submission attributes object for this timestamp subattr = SubmissionAttributes() subattr.usaspending_update = datetime.now() subattr.save() # Create lists to hold model instances for bulk insert txn_list = [] txn_contract_list = [] subtier_agency_dict = h.get_subtier_agency_dict() # Store some additional support data needed for the laod award_type_dict = {a[0]: a[1] for a in AWARD_TYPES} contract_pricing_dict = {c[0]: c[1] for c in CONTRACT_PRICING_TYPES} for idx, row in enumerate(reader): if len(reader) % 1000 == 0: self.logger.info("Read row {}".format(len(reader))) row = h.cleanse_values(row) awarding_agency_id = self.get_agency_id(row["contractingofficeagencyid"], subtier_agency_dict) # Create the transaction object for this row txn_dict = { "action_date": h.convert_date(row['signeddate']), "award": self.get_or_create_award(row, awarding_agency_id), "awarding_agency_id": awarding_agency_id, "data_source": "USA", "description": row["descriptionofcontractrequirement"], "federal_action_obligation": row["dollarsobligated"], "funding_agency_id": self.get_agency_id(row["fundingrequestingagencyid"], subtier_agency_dict), "last_modified_date": h.convert_date(row['last_modified_date']), "modification_number": row["modnumber"], "place_of_performance": h.get_or_create_location( row, mapper=location_mapper_place_of_performance), "period_of_performance_current_end_date": h.convert_date(row['currentcompletiondate']), "period_of_performance_start_date": h.convert_date(row['effectivedate']), "recipient": self.get_or_create_recipient(row), "submission": subattr, "type": evaluate_contract_award_type(row), "type_description": award_type_dict.get(evaluate_contract_award_type(row)), "usaspending_unique_transaction_id": row["unique_transaction_id"] } txn = Transaction(**txn_dict) txn_list.append(txn) # Create the transaction contract object for this row txn_contract_dict = { "submission": subattr, "piid": row['piid'], "parent_award_id": row['idvpiid'], "current_total_value_award": h.parse_numeric_value(row["baseandexercisedoptionsvalue"]), "period_of_performance_potential_end_date": h.convert_date(row['ultimatecompletiondate']), "potential_total_value_of_award": h.parse_numeric_value(row["baseandalloptionsvalue"]), "epa_designated_product": self.parse_first_character(row['useofepadesignatedproducts']), "gfe_gfp": h.up2colon(row['gfe_gfp']), "cost_or_pricing_data": h.up2colon(row['costorpricingdata']), "type_of_contract_pricing": h.up2colon(row['typeofcontractpricing']), "type_of_contract_pricing_description": contract_pricing_dict.get(h.up2colon(row['typeofcontractpricing'])), "multiple_or_single_award_idv": h.up2colon(row['multipleorsingleawardidc']), "naics": h.up2colon(row['nationalinterestactioncode']), "dod_claimant_program_code": h.up2colon(row['claimantprogramcode']), "commercial_item_acquisition_procedures": h.up2colon( row['commercialitemacquisitionprocedures']), "commercial_item_test_program": h.up2colon(row['commercialitemtestprogram']), "consolidated_contract": h.up2colon(row['consolidatedcontract']), "contingency_humanitarian_or_peacekeeping_operation": h.up2colon( row['contingencyhumanitarianpeacekeepingoperation']), "contract_bundling": h.up2colon(row['contractbundling']), "contract_financing": h.up2colon(row['contractfinancing']), "contracting_officers_determination_of_business_size": h.up2colon( row['contractingofficerbusinesssizedetermination']), "country_of_product_or_service_origin": h.up2colon(row['countryoforigin']), "davis_bacon_act": h.up2colon(row['davisbaconact']), "evaluated_preference": h.up2colon(row['evaluatedpreference']), "extent_competed": h.up2colon(row['extentcompeted']), "information_technology_commercial_item_category": h.up2colon( row['informationtechnologycommercialitemcategory']), "interagency_contracting_authority": h.up2colon(row['interagencycontractingauthority']), "local_area_set_aside": h.up2colon(row['localareasetaside']), "purchase_card_as_payment_method": h.up2colon(row['purchasecardaspaymentmethod']), "multi_year_contract": h.up2colon(row['multiyearcontract']), "national_interest_action": h.up2colon(row['nationalinterestactioncode']), "number_of_actions": h.up2colon(row['numberofactions']), "number_of_offers_received": h.up2colon(row['numberofoffersreceived']), "performance_based_service_acquisition": h.up2colon(row['performancebasedservicecontract']), "place_of_manufacture": h.up2colon(row['placeofmanufacture']), "product_or_service_code": h.up2colon(row['productorservicecode']), "recovered_materials_sustainability": h.up2colon(row['recoveredmaterialclauses']), "research": h.up2colon(row['research']), "sea_transportation": h.up2colon(row['seatransportation']), "service_contract_act": h.up2colon(row['servicecontractact']), "small_business_competitiveness_demonstration_program": self.parse_first_character( row['smallbusinesscompetitivenessdemonstrationprogram']), "solicitation_procedures": h.up2colon(row['solicitationprocedures']), "subcontracting_plan": h.up2colon(row['subcontractplan']), "type_set_aside": h.up2colon(row['typeofsetaside']), "walsh_healey_act": h.up2colon(row['walshhealyact']), "rec_flag": self.parse_first_character(h.up2colon(row['rec_flag'])), "type_of_idc": self.parse_first_character(row['typeofidc']), "a76_fair_act_action": self.parse_first_character(row['a76action']), "clinger_cohen_act_planning": self.parse_first_character(row['clingercohenact']), "cost_accounting_standards": self.parse_first_character( row['costaccountingstandardsclause']), "fed_biz_opps": self.parse_first_character(row['fedbizopps']), "foreign_funding": self.parse_first_character(row['fundedbyforeignentity']), "major_program": self.parse_first_character(row['majorprogramcode']), "program_acronym": self.parse_first_character(row['programacronym']), "referenced_idv_modification_number": self.parse_first_character( row['idvmodificationnumber']), "transaction_number": self.parse_first_character(row['transactionnumber']), "solicitation_identifier": self.parse_first_character(row['solicitationid']) } txn_contract = TransactionContract(**txn_contract_dict) txn_contract_list.append(txn_contract) # Bulk insert transaction rows self.logger.info("Starting Transaction bulk insert ({} records)".format(len(txn_list))) Transaction.objects.bulk_create(txn_list) self.logger.info("Completed Transaction bulk insert") # Update txn contract list with newly-inserted transactions award_id_list = [] # we'll need this when updating the awards later on for idx, t in enumerate(txn_contract_list): # add transaction info to this TransactionContract object t.transaction = txn_list[idx] # add the corresponding award id to a list we'll use when batch-updating award data award_id_list.append(txn_list[idx].award_id) # Bulk insert transaction contract rows self.logger.info("Starting TransactionContract bulk insert ({} records)".format(len(txn_contract_list))) TransactionContract.objects.bulk_create(txn_contract_list) self.logger.info("Completed TransactionContract bulk insert") # Update awards to reflect latest transaction information # (note that this can't be done via signals or a save() # override in the model itself, because those aren't # triggered by a bulk update self.logger.info("Starting Awards update") count = update_awards(tuple(award_id_list)) update_contract_awards(tuple(award_id_list)) self.logger.info("Completed Awards update ({} records)".format(count))
def handle(self, *args, **options): logger.info('Starting FPDS bulk data load...') db_cursor = connections['data_broker'].cursor() ds_cursor = connection.cursor() fiscal_year = options.get('fiscal_year') if fiscal_year: fiscal_year = fiscal_year[0] else: fiscal_year = 2017 logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) with timer('Diff-ing FPDS data', logger.info): to_insert, to_delete = self.diff_fpds_data(db_cursor=db_cursor, ds_cursor=ds_cursor, fiscal_year=fiscal_year) total_rows = len(to_insert) total_rows_delete = len(to_delete) if total_rows_delete > 0: with timer('Deleting stale FPDS data', logger.info): self.delete_stale_fpds(to_delete=to_delete) if total_rows > 0: # Set lookups after deletions to only get latest self.set_lookup_maps() with timer('Get Broker FPDS data', logger.info): fpds_broker_data = self.get_fpds_data(db_cursor=db_cursor, fiscal_year=fiscal_year, to_insert=to_insert) with timer('Loading POP Location data', logger.info): self.load_locations(fpds_broker_data=fpds_broker_data, total_rows=total_rows, pop_flag=True) with timer('Loading LE Location data', logger.info): self.load_locations(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Legal Entity data', logger.info): self.load_legal_entity(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Parent Award data', logger.info): self.load_parent_awards(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Award data', logger.info): self.load_awards(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Transaction Normalized data', logger.info): self.load_transaction_normalized( fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Transaction FPDS data', logger.info): self.load_transaction_fpds(fpds_broker_data=fpds_broker_data, total_rows=total_rows) award_update_id_list = [award.id for award in award_lookup] with timer( 'Updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer( 'Updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_update_id_list)) with timer('Updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...FINISHED!')
def handle(self, *args, **options): h.clear_caches() csv_file = options['file'][0] self.logger.info("Starting load for file {}".format(csv_file)) # Create the csv reader reader = CsvDataReader(csv_file) # Create a new submission attributes object for this timestamp subattr = SubmissionAttributes() subattr.usaspending_update = datetime.now() subattr.save() # Create lists to hold model instances for bulk insert txn_list = [] txn_assistance_list = [] for idx, row in enumerate(reader): if len(reader) % 1000 == 0: self.logger.info("Read row {}".format(len(reader))) row = h.cleanse_values(row) awarding_agency = self.get_awarding_agency( row) # todo: use agency dict? # Create the transaction object for this row txn_dict = { "submission": subattr, "action_date": h.convert_date(row['obligation_action_date']), "action_type": h.up2colon(row['action_type']), "award": self.get_or_create_award(row, awarding_agency=awarding_agency), "awarding_agency": awarding_agency, "description": row["project_description"], # ?? account_title is anther contender? "data_source": "USA", "federal_action_obligation": row["fed_funding_amount"], "last_modified_date": h.convert_date(row['last_modified_date']), "modification_number": row["federal_award_mod"], # ?? "period_of_performance_start_date": h.convert_date(row['starting_date']), "period_of_performance_current_end_date": h.convert_date(row['ending_date']), "place_of_performance": h.get_or_create_location( row, location_mapper_fin_assistance_principal_place), "recipient": self.get_or_create_recipient(row), "type": h.up2colon(row['assistance_type']), "usaspending_unique_transaction_id": row["unique_transaction_id"], # ??"funding_agency_id": # ?? "certified date": } txn = Transaction(**txn_dict) txn.fiscal_year = fy(txn.action_date) txn_list.append(txn) # Create the transaction contract object for this row txn_assistance_dict = { "submission": subattr, "fain": row["federal_award_id"], "uri": row["uri"], "cfda": Cfda.objects.filter( program_number=row["cfda_program_num"]).first(), "correction_late_delete_indicator": h.up2colon(row['correction_late_ind']), "face_value_loan_guarantee": row["face_loan_guran"], "fiscal_year_and_quarter_correction": row["fyq_correction"], "non_federal_funding_amount": row["non_fed_funding_amount"], "original_loan_subsidy_cost": row["orig_sub_guran"], # ?? "record_type": int(h.up2colon(row['record_type'])), "sai_number": row["sai_number"], "submitted_type": "C", # ?? For CSV? } # ?? business_funds_indicator # ?? reporting period start/end?? txn_assistance = TransactionAssistance(**txn_assistance_dict) txn_assistance_list.append(txn_assistance) # Bulk insert transaction rows self.logger.info( "Starting Transaction bulk insert ({} records)".format( len(txn_list))) Transaction.objects.bulk_create(txn_list) self.logger.info("Completed Transaction bulk insert") # Update txn assistance list with newly-inserted transactions award_id_list = [] # we'll need this when updating the awards later on for idx, t in enumerate(txn_assistance_list): t.transaction = txn_list[idx] award_id_list.append(txn_list[idx].award_id) # Bulk insert transaction assistance rows self.logger.info( "Starting TransactionAssistance bulk insert ({} records)".format( len(txn_assistance_list))) TransactionAssistance.objects.bulk_create(txn_assistance_list) self.logger.info("Completed TransactionAssistance bulk insert") # Update awards to reflect latest transaction information # (note that this can't be done via signals or a save() # override in the model itself, because those aren't # triggered by a bulk update self.logger.info("Starting Awards update") count = update_awards(tuple(award_id_list)) update_contract_awards(tuple(award_id_list)) update_model_description_fields() self.logger.info("Completed Awards update ({} records)".format(count))
def handle(self, *args, **options): logger.info('Starting FPDS bulk data load...') db_cursor = connections['data_broker'].cursor() ds_cursor = connection.cursor() fiscal_year = options.get('fiscal_year') if fiscal_year: fiscal_year = fiscal_year[0] else: fiscal_year = 2017 logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) with timer('Diff-ing FPDS data', logger.info): to_insert, to_delete = self.diff_fpds_data(db_cursor=db_cursor, ds_cursor=ds_cursor, fiscal_year=fiscal_year) total_rows = len(to_insert) total_rows_delete = len(to_delete) if total_rows_delete > 0: with timer('Deleting stale FPDS data', logger.info): self.delete_stale_fpds(to_delete=to_delete) if total_rows > 0: # Set lookups after deletions to only get latest self.set_lookup_maps() with timer('Get Broker FPDS data', logger.info): fpds_broker_data = self.get_fpds_data( db_cursor=db_cursor, fiscal_year=fiscal_year, to_insert=to_insert) with timer('Loading POP Location data', logger.info): self.load_locations(fpds_broker_data=fpds_broker_data, total_rows=total_rows, pop_flag=True) with timer('Loading LE Location data', logger.info): self.load_locations(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Legal Entity data', logger.info): self.load_legal_entity(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Parent Award data', logger.info): self.load_parent_awards(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Award data', logger.info): self.load_awards(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Transaction Normalized data', logger.info): self.load_transaction_normalized(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Transaction FPDS data', logger.info): self.load_transaction_fpds(fpds_broker_data=fpds_broker_data, total_rows=total_rows) award_update_id_list = [award.id for award in award_lookup] with timer('Updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer('Updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_update_id_list)) with timer('Updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...FINISHED!')