def load_transaction_normalized(self, fabs_broker_data, total_rows): start_time = datetime.now() for index, row in enumerate(fabs_broker_data, 1): if not (index % 10000): logger.info( 'Transaction Normalized: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) parent_txn_value_map = { "award": award_lookup[index - 1], "awarding_agency": awarding_agency_list[index - 1], "funding_agency": funding_agency_list[index - 1], "recipient": legal_entity_lookup[index - 1], "place_of_performance": pop_bulk[index - 1], "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": row['modified_at'] } fad_field_map = { "type": "assistance_type", "description": "award_description", } transaction_normalized = load_data_into_model( TransactionNormalized(), row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=False, save=False) transaction_normalized.fiscal_year = fy( transaction_normalized.action_date) transaction_normalized_bulk.append(transaction_normalized) logger.info( 'Bulk creating Transaction Normalized (batch_size: {})...'.format( BATCH_SIZE)) TransactionNormalized.objects.bulk_create(transaction_normalized_bulk, batch_size=BATCH_SIZE)
def totals(self): outlays = defaultdict(Decimal) obligations = defaultdict(Decimal) budget_authority = defaultdict(Decimal) for ab in self.account_balances.all(): fiscal_year = fy(ab.reporting_period_start) budget_authority[ fiscal_year] += ab.budget_authority_appropriated_amount_cpe outlays[fiscal_year] += ab.gross_outlay_amount_by_tas_cpe obligations[ fiscal_year] += ab.obligations_incurred_total_by_tas_cpe results = { 'outgoing': { 'outlays': outlays, 'obligations': obligations, 'budget_authority': budget_authority, }, 'incoming': {} } return results
def load_quarterly_spreadsheets(self, quarter, results, overall_totals): """Special procedure for getting quarterly update .xls files These are downloaded from MAX Information and Reports (Executive, Legislative, and Judicial Users) https://max.omb.gov/maxportal/document/SF133/Budget/FY%202017%20-%20SF%20133%20Reports%20on%20Budget%20Execution%20and%20Budgetary%20Resources.html """ quarterly_path = os.path.join(self.directory, 'quarterly', '*.xls') this_fy = fy(date.today()) overall_totals[this_fy] = 0 amount_column = 'Q{}_AMT'.format(quarter) for filename in glob.glob(quarterly_path): workbook = open_workbook(filename) sheet = workbook.sheets()[0] headers = [cell.value for cell in sheet.row(0)] for i in range(1, sheet.nrows): row = dict(zip(headers, (cell.value for cell in sheet.row(i)))) if row['LNO'] == '2500': dollars = int(row[amount_column]) * 1000 results[(row['TRAG'], None, this_fy)] = dollars overall_totals[this_fy] += dollars
def totals_program_activity(self): results = [] for pa in self.program_activities: obligations = defaultdict(Decimal) outlays = defaultdict(Decimal) for pb in self.program_balances.filter(program_activity=pa): reporting_fiscal_year = fy( pb.submission.reporting_period_start) # TODO: once it is present, use the reporting_fiscal_year directly obligations[ reporting_fiscal_year] += pb.obligations_incurred_by_program_object_class_cpe outlays[ reporting_fiscal_year] += pb.gross_outlay_amount_by_program_object_class_cpe result = { 'id': pa.id, 'program_activity_name': pa.program_activity_name, 'program_activity_code': pa.program_activity_code, 'obligations': obligations, 'outlays': outlays, } results.append(result) return results
def totals_object_class(self): results = [] for object_class in self.object_classes: obligations = defaultdict(Decimal) outlays = defaultdict(Decimal) for pb in self.program_balances.filter(object_class=object_class): reporting_fiscal_year = fy( pb.submission.reporting_period_start) obligations[ reporting_fiscal_year] += pb.obligations_incurred_by_program_object_class_cpe outlays[ reporting_fiscal_year] += pb.gross_outlay_amount_by_program_object_class_cpe result = { 'major_object_class_code': None, 'major_object_class_name': None, # TODO: enable once ObjectClass populated 'object_class': object_class.object_class, # TODO: remove 'outlays': obligations, 'obligations': outlays, } results.append(result) return results
def insert_new_fpds(self, to_insert, total_rows): logger.info('Starting insertion of new FPDS data') place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5" } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5" } start_time = datetime.now() for index, row in enumerate(to_insert, 1): if not (index % 1000): logger.info( 'Inserting Stale FPDS: Inserting row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) for key in row: if isinstance(row[key], str): row[key] = row[key].upper() # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, { "recipient_flag": True, "is_fpds": True }) recipient_name = row['awardee_or_recipient_legal'] legal_entity = LegalEntity.objects.create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name if recipient_name is not None else "") legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type='fpds'), "is_fpds": True } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id generated_unique_id = 'CONT_AW_' + (row['agency_id'] if row['agency_id'] else '-NONE-') + '_' + \ (row['referenced_idv_agency_iden'] if row['referenced_idv_agency_iden'] else '-NONE-') + '_' + \ (row['piid'] if row['piid'] else '-NONE-') + '_' + \ (row['parent_award_id'] if row['parent_award_id'] else '-NONE-') # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, piid=row['piid']) award.parent_award_piid = row.get('parent_award_id') award.save() # Append row to list of Awards updated award_update_id_list.append(award.id) try: last_mod_date = datetime.strptime(str( row['last_modified']), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row['last_modified']), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": last_mod_date, "transaction_unique_id": row['detached_award_proc_unique'], "generated_unique_award_id": generated_unique_id, "is_fpds": True } contract_field_map = { "type": "contract_award_type", "type_description": "contract_award_type_desc", "description": "award_description" } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) detached_award_proc_unique = contract_instance[ 'detached_award_proc_unique'] unique_fpds = TransactionFPDS.objects.filter( detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.utcnow() transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).\ update(**transaction_normalized_dict) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized( **transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save()
def get_date_signed__fy(self, obj): return fy(obj.date_signed)
def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFABS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True) query = "SELECT * FROM published_award_financial_assistance" arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s' arguments += [limit, (page-1)*limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") award_financial_assistance_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } fad_field_map = { "type": "assistance_type", "description": "award_description", } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(award_financial_assistance_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of assistance data") # skip_count = 0 # ROW ITERATION STARTS HERE lel_bulk = [] pop_bulk = [] legal_entity_bulk = [] award_bulk = [] transaction_assistance_bulk = [] transaction_normalized_bulk = [] logger.info('Getting legal entity location objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Recipient flag is true for LeL legal_entity_location = get_or_create_location( legal_entity_location_field_map, row, {"recipient_flag": True}, save=False ) lel_bulk.append(legal_entity_location) logger.info('Bulk creating {} legal entity location rows...'.format(len(lel_bulk))) try: Location.objects.bulk_create(lel_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting place of performance objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Place of Performance flag is true for PoP pop_location = get_or_create_location( place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False ) pop_bulk.append(pop_location) logger.info('Bulk creating {} place of performance rows...'.format(len(pop_bulk))) try: Location.objects.bulk_create(pop_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting legal entity objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): recipient_name = row.get('awardee_or_recipient_legal', '') legal_entity = LegalEntity.objects.filter(recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name).first() if legal_entity is None: legal_entity = LegalEntity(recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) legal_entity_value_map = { "location": lel_bulk[index - 1], } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=False) legal_entity_bulk.append(legal_entity) logger.info('Bulk creating {} legal entity rows...'.format(len(legal_entity_bulk))) try: LegalEntity.objects.bulk_create(legal_entity_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') awarding_agency_list = [] funding_agency_list = [] logger.info('Getting award objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = \ subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = toptier_agency_map[funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) funding_agency = Agency.get_by_toptier_subtier( row['funding_agency_code'], row["funding_sub_tier_agency_co"] ) awarding_agency_list.append(awarding_agency) funding_agency_list.append(funding_agency) # award.save() is called in Award.get_or_create_summary_award by default created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, fain=row.get('fain'), uri=row.get('uri'), save=False ) award_bulk.append(award) award_update_id_list.append(award.id) logger.info('Bulk creating {} award rows...'.format(len(award_bulk))) try: Award.objects.bulk_create(award_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting transaction_normalized for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): parent_txn_value_map = { "award": award_bulk[index - 1], "awarding_agency": awarding_agency_list[index - 1], "funding_agency": funding_agency_list[index - 1], "recipient": legal_entity_bulk[index - 1], "place_of_performance": pop_bulk[index - 1], "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_normalized = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction_normalized.fiscal_year = fy(transaction_normalized.action_date) transaction_normalized_bulk.append(transaction_normalized) logger.info('Bulk creating {} TransactionNormalized rows...'.format(len(transaction_normalized_bulk))) try: TransactionNormalized.objects.bulk_create(transaction_normalized_bulk) except IntegrityError: logger.info('Tried and failed to insert duplicate transaction_normalized row. Continuing... ') for index, row in enumerate(award_financial_assistance_data, 1): financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, as_dict=True) transaction_assistance = TransactionFABS(transaction=transaction_normalized_bulk[index - 1], **financial_assistance_data) transaction_assistance_bulk.append(transaction_assistance) logger.info('Bulk creating TransactionFABS rows...') try: TransactionFABS.objects.bulk_create(transaction_assistance_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ')
def test_fy_none(): assert fy(None) is None
def test_fy_type_exceptions(not_date): with pytest.raises(TypeError): fy(not_date)
def test_fy_returns_correct(raw_date, expected_fy): assert fy(raw_date) == expected_fy
def test_fy_returns_integer(raw_date, expected_fy): assert isinstance(fy(raw_date), int)
def handle(self, *args, **options): h.clear_caches() csv_file = options['file'][0] self.logger.info("Starting load for file {}".format(csv_file)) # Create the csv reader reader = CsvDataReader(csv_file) # Create a new submission attributes object for this timestamp subattr = SubmissionAttributes() subattr.usaspending_update = datetime.now() subattr.save() # Create lists to hold model instances for bulk insert txn_list = [] txn_assistance_list = [] for idx, row in enumerate(reader): if len(reader) % 1000 == 0: self.logger.info("Read row {}".format(len(reader))) row = h.cleanse_values(row) awarding_agency = self.get_awarding_agency( row) # todo: use agency dict? # Create the transaction object for this row txn_dict = { "submission": subattr, "action_date": h.convert_date(row['obligation_action_date']), "action_type": h.up2colon(row['action_type']), "award": self.get_or_create_award(row, awarding_agency=awarding_agency), "awarding_agency": awarding_agency, "description": row["project_description"], # ?? account_title is anther contender? "data_source": "USA", "federal_action_obligation": row["fed_funding_amount"], "last_modified_date": h.convert_date(row['last_modified_date']), "modification_number": row["federal_award_mod"], # ?? "period_of_performance_start_date": h.convert_date(row['starting_date']), "period_of_performance_current_end_date": h.convert_date(row['ending_date']), "place_of_performance": h.get_or_create_location( row, location_mapper_fin_assistance_principal_place), "recipient": self.get_or_create_recipient(row), "type": h.up2colon(row['assistance_type']), "usaspending_unique_transaction_id": row["unique_transaction_id"], # ??"funding_agency_id": # ?? "certified date": } txn = Transaction(**txn_dict) txn.fiscal_year = fy(txn.action_date) txn_list.append(txn) # Create the transaction contract object for this row txn_assistance_dict = { "submission": subattr, "fain": row["federal_award_id"], "uri": row["uri"], "cfda": Cfda.objects.filter( program_number=row["cfda_program_num"]).first(), "correction_late_delete_indicator": h.up2colon(row['correction_late_ind']), "face_value_loan_guarantee": row["face_loan_guran"], "fiscal_year_and_quarter_correction": row["fyq_correction"], "non_federal_funding_amount": row["non_fed_funding_amount"], "original_loan_subsidy_cost": row["orig_sub_guran"], # ?? "record_type": int(h.up2colon(row['record_type'])), "sai_number": row["sai_number"], "submitted_type": "C", # ?? For CSV? } # ?? business_funds_indicator # ?? reporting period start/end?? txn_assistance = TransactionAssistance(**txn_assistance_dict) txn_assistance_list.append(txn_assistance) # Bulk insert transaction rows self.logger.info( "Starting Transaction bulk insert ({} records)".format( len(txn_list))) Transaction.objects.bulk_create(txn_list) self.logger.info("Completed Transaction bulk insert") # Update txn assistance list with newly-inserted transactions award_id_list = [] # we'll need this when updating the awards later on for idx, t in enumerate(txn_assistance_list): t.transaction = txn_list[idx] award_id_list.append(txn_list[idx].award_id) # Bulk insert transaction assistance rows self.logger.info( "Starting TransactionAssistance bulk insert ({} records)".format( len(txn_assistance_list))) TransactionAssistance.objects.bulk_create(txn_assistance_list) self.logger.info("Completed TransactionAssistance bulk insert") # Update awards to reflect latest transaction information # (note that this can't be done via signals or a save() # override in the model itself, because those aren't # triggered by a bulk update self.logger.info("Starting Awards update") count = update_awards(tuple(award_id_list)) update_contract_awards(tuple(award_id_list)) update_model_description_fields() self.logger.info("Completed Awards update ({} records)".format(count))
def insert_new_fabs(self, to_insert, total_rows): logger.info('Starting insertion of new FABS data') place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perform_country_n", "state_code": "place_of_perfor_state_code", "state_name": "place_of_perform_state_nam", "city_name": "place_of_performance_city", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "foreign_location_description": "place_of_performance_forei", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "performance_code": "place_of_performance_code", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5" } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "city_name": "legal_entity_city_name", "city_code": "legal_entity_city_code", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "foreign_location_description": "legal_entity_foreign_descr", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "foreign_city_name": "legal_entity_foreign_city" } start_time = datetime.now() for index, row in enumerate(to_insert, 1): if not (index % 1000): logger.info('Inserting Stale FABS: Inserting row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) for key in row: if isinstance(row[key], str): row[key] = row[key].upper() # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location(legal_entity_location_field_map, row, {"recipient_flag": True}) recipient_name = row['awardee_or_recipient_legal'] legal_entity = LegalEntity.objects.create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name if recipient_name is not None else "" ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type='fabs'), "business_types_description": get_business_type_description(row['business_types']) } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri # this will raise an exception if the cast to an int fails, that's ok since we don't want to process # non-numeric record type values record_type_int = int(row['record_type']) if record_type_int == 1: uri = row['uri'] if row['uri'] else '-NONE-' fain = '-NONE-' elif record_type_int == 2: uri = '-NONE-' fain = row['fain'] if row['fain'] else '-NONE-' else: raise Exception('Invalid record type encountered for the following afa_generated_unique record: %s' % row['afa_generated_unique']) generated_unique_id = 'ASST_AW_' +\ (row['awarding_sub_tier_agency_c'] if row['awarding_sub_tier_agency_c'] else '-NONE-') + '_' + \ fain + '_' + uri # Create the summary Award (created, award) = Award.get_or_create_summary_award(generated_unique_award_id=generated_unique_id, fain=row['fain'], uri=row['uri'], record_type=row['record_type']) award.save() # Append row to list of Awards updated award_update_id_list.append(award.id) try: last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": last_mod_date, "type_description": get_assistance_type_description(row['assistance_type']), "transaction_unique_id": row['afa_generated_unique'], "generated_unique_award_id": generated_unique_id } fad_field_map = { "type": "assistance_type", "description": "award_description", } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, as_dict=True) afa_generated_unique = financial_assistance_data['afa_generated_unique'] unique_fabs = TransactionFABS.objects.filter(afa_generated_unique=afa_generated_unique) if unique_fabs.first(): transaction_normalized_dict["update_date"] = datetime.utcnow() transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"]) # Update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fabs.first().transaction.id).\ update(**transaction_normalized_dict) # Update TransactionFABS unique_fabs.update(**financial_assistance_data) else: # Create TransactionNormalized transaction = TransactionNormalized(**transaction_normalized_dict) transaction.save() # Create TransactionFABS transaction_fabs = TransactionFABS(transaction=transaction, **financial_assistance_data) transaction_fabs.save()
def save(self, *args, **kwargs): self.fiscal_year = fy(self.action_date) super().save(*args, **kwargs)
def handle(self, *args, **options): h.clear_caches() csv_file = options['file'][0] self.logger.info("Starting load for file {}".format(csv_file)) # Create the csv reader reader = CsvDataReader(csv_file) # Create a new submission attributes object for this timestamp subattr = SubmissionAttributes() subattr.usaspending_update = datetime.now() subattr.save() # Create lists to hold model instances for bulk insert txn_list = [] txn_contract_list = [] subtier_agency_dict = h.get_subtier_agency_dict() for idx, row in enumerate(reader): if len(reader) % 1000 == 0: self.logger.info("Read row {}".format(len(reader))) row = h.cleanse_values(row) awarding_agency_id = self.get_agency_id(row["contractingofficeagencyid"], subtier_agency_dict) # Create the transaction object for this row txn_dict = { "action_date": h.convert_date(row['signeddate']), "award": self.get_or_create_award(row, awarding_agency_id), "awarding_agency_id": awarding_agency_id, "data_source": "USA", "description": row["descriptionofcontractrequirement"], "federal_action_obligation": row["dollarsobligated"], "funding_agency_id": self.get_agency_id(row["fundingrequestingagencyid"], subtier_agency_dict), "last_modified_date": h.convert_date(row['last_modified_date']), "modification_number": row["modnumber"], "place_of_performance": h.get_or_create_location( row, mapper=location_mapper_place_of_performance), "period_of_performance_current_end_date": h.convert_date(row['currentcompletiondate']), "period_of_performance_start_date": h.convert_date(row['effectivedate']), "recipient": self.get_or_create_recipient(row), "submission": subattr, "type": evaluate_contract_award_type(row), "action_type": h.up2colon(row['reasonformodification']), "usaspending_unique_transaction_id": row["unique_transaction_id"] } txn = Transaction(**txn_dict) txn.fiscal_year = fy(txn.action_date) txn_list.append(txn) # Create the transaction contract object for this row txn_contract_dict = { "submission": subattr, "piid": row['piid'], "parent_award_id": row['idvpiid'], "current_total_value_award": h.parse_numeric_value(row["baseandexercisedoptionsvalue"]), "period_of_performance_potential_end_date": h.convert_date(row['ultimatecompletiondate']), "potential_total_value_of_award": h.parse_numeric_value(row["baseandalloptionsvalue"]), "epa_designated_product": self.parse_first_character(row['useofepadesignatedproducts']), "gfe_gfp": h.up2colon(row['gfe_gfp']), "cost_or_pricing_data": h.up2colon(row['costorpricingdata']), "type_of_contract_pricing": h.up2colon(row['typeofcontractpricing']), "multiple_or_single_award_idv": h.up2colon(row['multipleorsingleawardidc']), "naics": h.up2colon(row['nationalinterestactioncode']), "dod_claimant_program_code": h.up2colon(row['claimantprogramcode']), "commercial_item_acquisition_procedures": h.up2colon( row['commercialitemacquisitionprocedures']), "commercial_item_test_program": h.up2colon(row['commercialitemtestprogram']), "consolidated_contract": h.up2colon(row['consolidatedcontract']), "contingency_humanitarian_or_peacekeeping_operation": h.up2colon( row['contingencyhumanitarianpeacekeepingoperation']), "contract_bundling": h.up2colon(row['contractbundling']), "contract_financing": h.up2colon(row['contractfinancing']), "contracting_officers_determination_of_business_size": h.up2colon( row['contractingofficerbusinesssizedetermination']), "country_of_product_or_service_origin": h.up2colon(row['countryoforigin']), "davis_bacon_act": h.up2colon(row['davisbaconact']), "evaluated_preference": h.up2colon(row['evaluatedpreference']), "extent_competed": h.up2colon(row['extentcompeted']), "information_technology_commercial_item_category": h.up2colon( row['informationtechnologycommercialitemcategory']), "interagency_contracting_authority": h.up2colon(row['interagencycontractingauthority']), "local_area_set_aside": h.up2colon(row['localareasetaside']), "purchase_card_as_payment_method": h.up2colon(row['purchasecardaspaymentmethod']), "multi_year_contract": h.up2colon(row['multiyearcontract']), "national_interest_action": h.up2colon(row['nationalinterestactioncode']), "number_of_actions": h.up2colon(row['numberofactions']), "number_of_offers_received": h.up2colon(row['numberofoffersreceived']), "performance_based_service_acquisition": h.up2colon(row['performancebasedservicecontract']), "place_of_manufacture": h.up2colon(row['placeofmanufacture']), "product_or_service_code": h.up2colon(row['productorservicecode']), "recovered_materials_sustainability": h.up2colon(row['recoveredmaterialclauses']), "research": h.up2colon(row['research']), "sea_transportation": h.up2colon(row['seatransportation']), "service_contract_act": h.up2colon(row['servicecontractact']), "small_business_competitiveness_demonstration_program": self.parse_first_character( row['smallbusinesscompetitivenessdemonstrationprogram']), "solicitation_procedures": h.up2colon(row['solicitationprocedures']), "subcontracting_plan": h.up2colon(row['subcontractplan']), "type_set_aside": h.up2colon(row['typeofsetaside']), "walsh_healey_act": h.up2colon(row['walshhealyact']), "rec_flag": self.parse_first_character(h.up2colon(row['rec_flag'])), "type_of_idc": self.parse_first_character(row['typeofidc']), "a76_fair_act_action": self.parse_first_character(row['a76action']), "clinger_cohen_act_planning": self.parse_first_character(row['clingercohenact']), "cost_accounting_standards": self.parse_first_character( row['costaccountingstandardsclause']), "fed_biz_opps": self.parse_first_character(row['fedbizopps']), "foreign_funding": self.parse_first_character(row['fundedbyforeignentity']), "major_program": self.parse_first_character(row['majorprogramcode']), "program_acronym": self.parse_first_character(row['programacronym']), "referenced_idv_modification_number": self.parse_first_character( row['idvmodificationnumber']), "transaction_number": self.parse_first_character(row['transactionnumber']), "solicitation_identifier": self.parse_first_character(row['solicitationid']) } txn_contract = TransactionContract(**txn_contract_dict) txn_contract_list.append(txn_contract) # Bulk insert transaction rows self.logger.info("Starting Transaction bulk insert ({} records)".format(len(txn_list))) Transaction.objects.bulk_create(txn_list) self.logger.info("Completed Transaction bulk insert") # Update txn contract list with newly-inserted transactions award_id_list = [] # we'll need this when updating the awards later on for idx, t in enumerate(txn_contract_list): # add transaction info to this TransactionContract object t.transaction = txn_list[idx] # add the corresponding award id to a list we'll use when batch-updating award data award_id_list.append(txn_list[idx].award_id) # Bulk insert transaction contract rows self.logger.info("Starting TransactionContract bulk insert ({} records)".format(len(txn_contract_list))) TransactionContract.objects.bulk_create(txn_contract_list) self.logger.info("Completed TransactionContract bulk insert") # Update awards to reflect latest transaction information # (note that this can't be done via signals or a save() # override in the model itself, because those aren't # triggered by a bulk update self.logger.info("Starting Awards update") count = update_awards(tuple(award_id_list)) update_contract_awards(tuple(award_id_list)) update_model_description_fields() self.logger.info("Completed Awards update ({} records)".format(count))