def load_file_a(submission_attributes, appropriation_data, db_cursor): """ Process and load file A broker data (aka TAS balances, aka appropriation account balances). """ reverse = re.compile("gross_outlay_amount_by_tas_cpe") # dictionary to capture TAS that were skipped and some metadata # tas = top-level key # count = number of rows skipped # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted skipped_tas = {} bulk_treasury_appropriation_account_tas_lookup(appropriation_data, db_cursor) # Create account objects save_manager = BulkCreateManager(AppropriationAccountBalances) for row in appropriation_data: # Check and see if there is an entry for this TAS treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(row.get("tas_id")) if treasury_account is None: update_skipped_tas(row, tas_rendering_label, skipped_tas) continue # Now that we have the account, we can load the appropriation balances # TODO: Figure out how we want to determine what row is overridden by what row # If we want to correlate, the following attributes are available in the data broker data that might be useful: # appropriation_id, row_number appropriation_balances = something something get appropriation balances... appropriation_balances = AppropriationAccountBalances() value_map = { "treasury_account_identifier": treasury_account, "submission": submission_attributes, "reporting_period_start": submission_attributes.reporting_period_start, "reporting_period_end": submission_attributes.reporting_period_end, } field_map = {} save_manager.append( load_data_into_model( appropriation_balances, row, field_map=field_map, value_map=value_map, save=False, reverse=reverse ) ) save_manager.save_stragglers() for key in skipped_tas: logger.info(f"Skipped {skipped_tas[key]['count']:,} rows due to missing TAS: {key}") total_tas_skipped = 0 for key in skipped_tas: total_tas_skipped += skipped_tas[key]["count"] logger.info(f"Skipped a total of {total_tas_skipped:,} TAS rows for File A")
def _save_file_c_rows(certified_award_financial, total_rows, start_time, skipped_tas, submission_attributes, reverse): save_manager = BulkCreateManager(FinancialAccountsByAwards) for index, row in enumerate(certified_award_financial, 1): if not (index % 1000): logger.info(f"C File Load: Loading row {index:,} of {total_rows:,} ({datetime.now() - start_time})") upper_case_dict_values(row) # Check and see if there is an entry for this TAS treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(row.get("tas_id")) if treasury_account is None: update_skipped_tas(row, tas_rendering_label, skipped_tas) continue # Find a matching transaction record, so we can use its subtier agency information to match to (or create) an # Award record. # Find the award that this award transaction belongs to. If it doesn't exist, create it. filters = {} if row.get("piid"): filters["piid"] = row.get("piid") filters["parent_piid"] = row.get("parent_award_id") else: if row.get("fain") and not row.get("uri"): filters["fain"] = row.get("fain") elif row.get("uri") and not row.get("fain"): filters["uri"] = row.get("uri") else: filters["fain"] = row.get("fain") filters["uri"] = row.get("uri") award_financial_data = FinancialAccountsByAwards() value_map_faba = { "submission": submission_attributes, "reporting_period_start": submission_attributes.reporting_period_start, "reporting_period_end": submission_attributes.reporting_period_end, "treasury_account": treasury_account, "object_class": row.get("object_class"), "program_activity": row.get("program_activity"), "disaster_emergency_fund": get_disaster_emergency_fund(row), } save_manager.append( load_data_into_model(award_financial_data, row, value_map=value_map_faba, save=False, reverse=reverse) ) save_manager.save_stragglers()
def load_file_a(submission_attributes, appropriation_data, db_cursor): """ Process and load file A broker data (aka TAS balances, aka appropriation account balances). """ reverse = re.compile("gross_outlay_amount_by_tas_cpe") skipped_tas = defaultdict(int) # tracks count of rows skipped due to "missing" TAS bulk_treasury_appropriation_account_tas_lookup(appropriation_data, db_cursor) # Create account objects save_manager = BulkCreateManager(AppropriationAccountBalances) for row in appropriation_data: # Check and see if there is an entry for this TAS treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(row.get("tas_id")) if treasury_account is None: skipped_tas[tas_rendering_label] += 1 continue # Now that we have the account, we can load the appropriation balances # TODO: Figure out how we want to determine what row is overridden by what row # If we want to correlate, the following attributes are available in the data broker data that might be useful: # appropriation_id, row_number appropriation_balances = something something get appropriation balances... appropriation_balances = AppropriationAccountBalances() value_map = { "treasury_account_identifier": treasury_account, "submission": submission_attributes, "reporting_period_start": submission_attributes.reporting_period_start, "reporting_period_end": submission_attributes.reporting_period_end, } field_map = {} save_manager.append( load_data_into_model( appropriation_balances, row, field_map=field_map, value_map=value_map, save=False, reverse=reverse ) ) save_manager.save_stragglers() for tas, count in skipped_tas.items(): logger.info(f"Skipped {count:,} rows due to {tas}") total_tas_skipped = sum([count for count in skipped_tas.values()]) if total_tas_skipped > 0: logger.info(f"SKIPPED {total_tas_skipped:,} ROWS of File A (missing TAS)") else: logger.info("All File A records in Broker loaded into USAspending")
def _save_file_c_rows(certified_award_financial, total_rows, start_time, skipped_tas, submission_attributes, reverse): save_manager = BulkCreateManager(FinancialAccountsByAwards) for index, row in enumerate(certified_award_financial, 1): if not (index % 1000): logger.info( f"C File Load: Loading row {index:,} of {total_rows:,} ({datetime.now() - start_time})" ) upper_case_dict_values(row) # Check and see if there is an entry for this TAS treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup( row.get("tas_id")) if treasury_account is None: update_skipped_tas(row, tas_rendering_label, skipped_tas) continue award_financial_data = FinancialAccountsByAwards() value_map_faba = { "submission": submission_attributes, "reporting_period_start": submission_attributes.reporting_period_start, "reporting_period_end": submission_attributes.reporting_period_end, "treasury_account": treasury_account, "object_class": row.get("object_class"), "program_activity": row.get("program_activity"), "disaster_emergency_fund": get_disaster_emergency_fund(row), "distinct_award_key": create_distinct_award_key(row), } save_manager.append( load_data_into_model(award_financial_data, row, value_map=value_map_faba, save=False, reverse=reverse)) save_manager.save_stragglers()
def load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor): """ Process and load file B broker data (aka TAS balances by program activity and object class). """ reverse = re.compile(r"(_(cpe|fyb)$)|^transaction_obligated_amount$") # dictionary to capture TAS that were skipped and some metadata # tas = top-level key # count = number of rows skipped # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted skipped_tas = {} bulk_treasury_appropriation_account_tas_lookup(prg_act_obj_cls_data, db_cursor) save_manager = BulkCreateManager( FinancialAccountsByProgramActivityObjectClass) for row in prg_act_obj_cls_data: # Check and see if there is an entry for this TAS treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup( row.get("tas_id")) if treasury_account is None: update_skipped_tas(row, tas_rendering_label, skipped_tas) continue # get the corresponding account balances row (aka "File A" record) account_balances = AppropriationAccountBalances.objects.get( treasury_account_identifier=treasury_account, submission_id=submission_attributes.submission_id) financial_by_prg_act_obj_cls = FinancialAccountsByProgramActivityObjectClass( ) value_map = { "submission": submission_attributes, "reporting_period_start": submission_attributes.reporting_period_start, "reporting_period_end": submission_attributes.reporting_period_end, "treasury_account": treasury_account, "appropriation_account_balances": account_balances, "object_class": get_object_class(row["object_class"], row["by_direct_reimbursable_fun"]), "program_activity": get_program_activity(row, submission_attributes), "disaster_emergency_fund": get_disaster_emergency_fund(row), } save_manager.append( load_data_into_model(financial_by_prg_act_obj_cls, row, value_map=value_map, save=False, reverse=reverse)) save_manager.save_stragglers() for key in skipped_tas: logger.info( f"Skipped {skipped_tas[key]['count']:,} rows due to missing TAS: {key}" ) total_tas_skipped = 0 for key in skipped_tas: total_tas_skipped += skipped_tas[key]["count"] logger.info( f"Skipped a total of {total_tas_skipped:,} TAS rows for File B")
def load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor): """ Process and load file B broker data (aka TAS balances by program activity and object class). """ reverse = re.compile(r"(_(cpe|fyb)$)|^transaction_obligated_amount$") skipped_tas = defaultdict( int) # tracks count of rows skipped due to "missing" TAS bulk_treasury_appropriation_account_tas_lookup(prg_act_obj_cls_data, db_cursor) save_manager = BulkCreateManager( FinancialAccountsByProgramActivityObjectClass) for row in prg_act_obj_cls_data: # Check and see if there is an entry for this TAS treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup( row.get("tas_id")) if treasury_account is None: skipped_tas[tas_rendering_label] += 1 continue # get the corresponding account balances row (aka "File A" record) account_balances = AppropriationAccountBalances.objects.get( treasury_account_identifier=treasury_account, submission_id=submission_attributes.submission_id) financial_by_prg_act_obj_cls = FinancialAccountsByProgramActivityObjectClass( ) value_map = { "submission": submission_attributes, "reporting_period_start": submission_attributes.reporting_period_start, "reporting_period_end": submission_attributes.reporting_period_end, "treasury_account": treasury_account, "appropriation_account_balances": account_balances, "object_class": get_object_class(row["object_class"], row["by_direct_reimbursable_fun"]), "program_activity": get_program_activity(row, submission_attributes), "disaster_emergency_fund": get_disaster_emergency_fund(row), } save_manager.append( load_data_into_model(financial_by_prg_act_obj_cls, row, value_map=value_map, save=False, reverse=reverse)) save_manager.save_stragglers() for tas, count in skipped_tas.items(): logger.info(f"Skipped {count:,} rows due to {tas}") total_tas_skipped = sum([count for count in skipped_tas.values()]) if total_tas_skipped > 0: logger.info( f"SKIPPED {total_tas_skipped:,} ROWS of File B (missing TAS)") else: logger.info("All File B records in Broker loaded into USAspending")