def load_file_a(submission_attributes, appropriation_data, db_cursor): """ Process and load file A broker data (aka TAS balances, aka appropriation account balances). """ reverse = re.compile("gross_outlay_amount_by_tas_cpe") # dictionary to capture TAS that were skipped and some metadata # tas = top-level key # count = number of rows skipped # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted skipped_tas = {} bulk_treasury_appropriation_account_tas_lookup(appropriation_data, db_cursor) # Create account objects save_manager = BulkCreateManager(AppropriationAccountBalances) for row in appropriation_data: # Check and see if there is an entry for this TAS treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(row.get("tas_id")) if treasury_account is None: update_skipped_tas(row, tas_rendering_label, skipped_tas) continue # Now that we have the account, we can load the appropriation balances # TODO: Figure out how we want to determine what row is overridden by what row # If we want to correlate, the following attributes are available in the data broker data that might be useful: # appropriation_id, row_number appropriation_balances = something something get appropriation balances... appropriation_balances = AppropriationAccountBalances() value_map = { "treasury_account_identifier": treasury_account, "submission": submission_attributes, "reporting_period_start": submission_attributes.reporting_period_start, "reporting_period_end": submission_attributes.reporting_period_end, } field_map = {} save_manager.append( load_data_into_model( appropriation_balances, row, field_map=field_map, value_map=value_map, save=False, reverse=reverse ) ) save_manager.save_stragglers() for key in skipped_tas: logger.info(f"Skipped {skipped_tas[key]['count']:,} rows due to missing TAS: {key}") total_tas_skipped = 0 for key in skipped_tas: total_tas_skipped += skipped_tas[key]["count"] logger.info(f"Skipped a total of {total_tas_skipped:,} TAS rows for File A")
def load_file_a(submission_attributes, appropriation_data, db_cursor): """ Process and load file A broker data (aka TAS balances, aka appropriation account balances). """ reverse = re.compile("gross_outlay_amount_by_tas_cpe") skipped_tas = defaultdict(int) # tracks count of rows skipped due to "missing" TAS bulk_treasury_appropriation_account_tas_lookup(appropriation_data, db_cursor) # Create account objects save_manager = BulkCreateManager(AppropriationAccountBalances) for row in appropriation_data: # Check and see if there is an entry for this TAS treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(row.get("tas_id")) if treasury_account is None: skipped_tas[tas_rendering_label] += 1 continue # Now that we have the account, we can load the appropriation balances # TODO: Figure out how we want to determine what row is overridden by what row # If we want to correlate, the following attributes are available in the data broker data that might be useful: # appropriation_id, row_number appropriation_balances = something something get appropriation balances... appropriation_balances = AppropriationAccountBalances() value_map = { "treasury_account_identifier": treasury_account, "submission": submission_attributes, "reporting_period_start": submission_attributes.reporting_period_start, "reporting_period_end": submission_attributes.reporting_period_end, } field_map = {} save_manager.append( load_data_into_model( appropriation_balances, row, field_map=field_map, value_map=value_map, save=False, reverse=reverse ) ) save_manager.save_stragglers() for tas, count in skipped_tas.items(): logger.info(f"Skipped {count:,} rows due to {tas}") total_tas_skipped = sum([count for count in skipped_tas.values()]) if total_tas_skipped > 0: logger.info(f"SKIPPED {total_tas_skipped:,} ROWS of File A (missing TAS)") else: logger.info("All File A records in Broker loaded into USAspending")
def load_file_c(submission_attributes, db_cursor, certified_award_financial): """ Process and load file C broker data. Note: this should run AFTER the D1 and D2 files are loaded because we try to join to those records to retrieve some additional information about the awarding sub-tier agency. """ # this matches the file b reverse directive, but am repeating it here to ensure that we don't overwrite it as we # change up the order of file loading if certified_award_financial.count == 0: logger.warning("No File C (award financial) data found, skipping...") return reverse = re.compile(r"(_(cpe|fyb)$)|^transaction_obligated_amount$") # dictionary to capture TAS that were skipped and some metadata # tas = top-level key # count = number of rows skipped # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted skipped_tas = {} total_rows = certified_award_financial.count start_time = datetime.now() bulk_treasury_appropriation_account_tas_lookup( certified_award_financial.tas_ids, db_cursor) _save_file_c_rows(certified_award_financial, total_rows, start_time, skipped_tas, submission_attributes, reverse) update_c_to_d_linkages("contract", False, submission_attributes.submission_id) update_c_to_d_linkages("assistance", False, submission_attributes.submission_id) for key in skipped_tas: logger.info( f"Skipped {skipped_tas[key]['count']:,} rows due to missing TAS: {key}" ) total_tas_skipped = 0 for key in skipped_tas: total_tas_skipped += skipped_tas[key]["count"] logger.info( f"Skipped a total of {total_tas_skipped:,} TAS rows for File C")
def load_file_c(submission_attributes, db_cursor, certified_award_financial): """ Process and load file C broker data. Note: this should run AFTER the D1 and D2 files are loaded because we try to join to those records to retrieve some additional information about the awarding sub-tier agency. """ if certified_award_financial.count == 0: logger.warning("No File C (award financial) data found, skipping...") return # this matches the file b reverse directive, but am repeating it here to ensure that we don't overwrite it as we # change up the order of file loading reverse = re.compile(r"(_(cpe|fyb)$)|^transaction_obligated_amount$") skipped_tas = defaultdict( int) # tracks count of rows skipped due to "missing" TAS total_rows = certified_award_financial.count start_time = datetime.now() bulk_treasury_appropriation_account_tas_lookup( certified_award_financial.tas_ids, db_cursor) _save_file_c_rows(certified_award_financial, total_rows, start_time, skipped_tas, submission_attributes, reverse) update_c_to_d_linkages("contract", False, submission_attributes.submission_id) update_c_to_d_linkages("assistance", False, submission_attributes.submission_id) for tas, count in skipped_tas.items(): logger.info(f"Skipped {count:,} rows due to {tas}") total_tas_skipped = sum([count for count in skipped_tas.values()]) if total_tas_skipped > 0: logger.info( f"SKIPPED {total_tas_skipped:,} ROWS of File C (missing TAS)") else: logger.info("All File C records in Broker loaded into USAspending")
def load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor): """ Process and load file B broker data (aka TAS balances by program activity and object class). """ reverse = re.compile(r"(_(cpe|fyb)$)|^transaction_obligated_amount$") # dictionary to capture TAS that were skipped and some metadata # tas = top-level key # count = number of rows skipped # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted skipped_tas = {} bulk_treasury_appropriation_account_tas_lookup(prg_act_obj_cls_data, db_cursor) save_manager = BulkCreateManager( FinancialAccountsByProgramActivityObjectClass) for row in prg_act_obj_cls_data: # Check and see if there is an entry for this TAS treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup( row.get("tas_id")) if treasury_account is None: update_skipped_tas(row, tas_rendering_label, skipped_tas) continue # get the corresponding account balances row (aka "File A" record) account_balances = AppropriationAccountBalances.objects.get( treasury_account_identifier=treasury_account, submission_id=submission_attributes.submission_id) financial_by_prg_act_obj_cls = FinancialAccountsByProgramActivityObjectClass( ) value_map = { "submission": submission_attributes, "reporting_period_start": submission_attributes.reporting_period_start, "reporting_period_end": submission_attributes.reporting_period_end, "treasury_account": treasury_account, "appropriation_account_balances": account_balances, "object_class": get_object_class(row["object_class"], row["by_direct_reimbursable_fun"]), "program_activity": get_program_activity(row, submission_attributes), "disaster_emergency_fund": get_disaster_emergency_fund(row), } save_manager.append( load_data_into_model(financial_by_prg_act_obj_cls, row, value_map=value_map, save=False, reverse=reverse)) save_manager.save_stragglers() for key in skipped_tas: logger.info( f"Skipped {skipped_tas[key]['count']:,} rows due to missing TAS: {key}" ) total_tas_skipped = 0 for key in skipped_tas: total_tas_skipped += skipped_tas[key]["count"] logger.info( f"Skipped a total of {total_tas_skipped:,} TAS rows for File B")
def load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor): """ Process and load file B broker data (aka TAS balances by program activity and object class). """ reverse = re.compile(r"(_(cpe|fyb)$)|^transaction_obligated_amount$") skipped_tas = defaultdict( int) # tracks count of rows skipped due to "missing" TAS bulk_treasury_appropriation_account_tas_lookup(prg_act_obj_cls_data, db_cursor) save_manager = BulkCreateManager( FinancialAccountsByProgramActivityObjectClass) for row in prg_act_obj_cls_data: # Check and see if there is an entry for this TAS treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup( row.get("tas_id")) if treasury_account is None: skipped_tas[tas_rendering_label] += 1 continue # get the corresponding account balances row (aka "File A" record) account_balances = AppropriationAccountBalances.objects.get( treasury_account_identifier=treasury_account, submission_id=submission_attributes.submission_id) financial_by_prg_act_obj_cls = FinancialAccountsByProgramActivityObjectClass( ) value_map = { "submission": submission_attributes, "reporting_period_start": submission_attributes.reporting_period_start, "reporting_period_end": submission_attributes.reporting_period_end, "treasury_account": treasury_account, "appropriation_account_balances": account_balances, "object_class": get_object_class(row["object_class"], row["by_direct_reimbursable_fun"]), "program_activity": get_program_activity(row, submission_attributes), "disaster_emergency_fund": get_disaster_emergency_fund(row), } save_manager.append( load_data_into_model(financial_by_prg_act_obj_cls, row, value_map=value_map, save=False, reverse=reverse)) save_manager.save_stragglers() for tas, count in skipped_tas.items(): logger.info(f"Skipped {count:,} rows due to {tas}") total_tas_skipped = sum([count for count in skipped_tas.values()]) if total_tas_skipped > 0: logger.info( f"SKIPPED {total_tas_skipped:,} ROWS of File B (missing TAS)") else: logger.info("All File B records in Broker loaded into USAspending")