def insert_new_fpds(self, to_insert, total_rows): logger.info('Starting insertion of new FPDS data') place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5" } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5" } start_time = datetime.now() for index, row in enumerate(to_insert, 1): if not (index % 1000): logger.info( 'Inserting Stale FPDS: Inserting row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, { "recipient_flag": True, "is_fpds": True }) recipient_name = row['awardee_or_recipient_legal'] legal_entity = LegalEntity.objects.create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name if recipient_name is not None else "") legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type='fpds'), "is_fpds": True } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id generated_unique_id = 'CONT_AW_' + (row['agency_id'] if row['agency_id'] else '-NONE-') + '_' + \ (row['referenced_idv_agency_iden'] if row['referenced_idv_agency_iden'] else '-NONE-') + '_' + \ (row['piid'] if row['piid'] else '-NONE-') + '_' + \ (row['parent_award_id'] if row['parent_award_id'] else '-NONE-') # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, piid=row['piid']) award.parent_award_piid = row.get('parent_award_id') award.save() # Append row to list of Awards updated award_update_id_list.append(award.id) try: last_mod_date = datetime.strptime(str( row['last_modified']), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row['last_modified']), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": last_mod_date, "transaction_unique_id": row['detached_award_proc_unique'], "generated_unique_award_id": generated_unique_id, "is_fpds": True } contract_field_map = { "type": "contract_award_type", "type_description": "contract_award_type_desc", "description": "award_description" } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) detached_award_proc_unique = contract_instance[ 'detached_award_proc_unique'] unique_fpds = TransactionFPDS.objects.filter( detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.utcnow() transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).\ update(**transaction_normalized_dict) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized( **transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save()
def create_subaward(self, row, shared_award_mappings, award_type): """ Creates a subaward if the internal ID of the current row is in the shared award mappings (this was made to satisfy codeclimate complexity issues) """ # only insert the subaward if the internal_id is in our mappings, otherwise there was a problem # finding one or more parts of the shared data for it and we don't want to insert it. if row['internal_id'] not in shared_award_mappings: logger.info( "[Internal ID {}] not in 'shared_award_mappings'".format( row["internal_id"])) else: shared_mappings = shared_award_mappings[row['internal_id']] prime_award_dict = {} if shared_mappings['award']: prime_award_dict['prime_recipient'] = shared_mappings[ 'award'].recipient if prime_award_dict['prime_recipient']: prime_award_dict['prime_recipient_name'] = shared_mappings[ 'award'].recipient.recipient_name prime_award_dict['business_categories'] = ( shared_mappings['award'].recipient.business_categories or []) upper_case_dict_values(row) subaward_dict = { 'recipient_unique_id': row['duns'], 'recipient_name': row['recipient_name'], 'dba_name': row['dba_name'], 'parent_recipient_unique_id': row['parent_duns'], 'parent_recipient_name': row.get('parent_recipient_name', None), 'business_type_description': row.get('bus_types', None), 'prime_recipient': prime_award_dict.get('prime_recipient', None), 'prime_recipient_name': prime_award_dict.get('prime_recipient_name', None), 'business_categories': prime_award_dict.get('business_categories', []), 'recipient_location_country_code': row['recipient_location_country_code'], 'recipient_location_state_code': row['recipient_location_state_code'], 'recipient_location_state_name': row['recipient_location_state_name'], 'recipient_location_city_name': row['recipient_location_city_name'], 'recipient_location_zip4': row['recipient_location_zip4'], 'recipient_location_zip5': row['recipient_location_zip5'], 'recipient_location_street_address': row['recipient_location_street_address'], 'recipient_location_congressional_code': row['recipient_location_congressional_code'], 'officer_1_name': row['top_paid_fullname_1'], 'officer_1_amount': row['top_paid_amount_1'], 'officer_2_name': row['top_paid_fullname_2'], 'officer_2_amount': row['top_paid_amount_2'], 'officer_3_name': row['top_paid_fullname_3'], 'officer_3_amount': row['top_paid_amount_3'], 'officer_4_name': row['top_paid_fullname_4'], 'officer_4_amount': row['top_paid_amount_4'], 'officer_5_name': row['top_paid_fullname_5'], 'officer_5_amount': row['top_paid_amount_5'], 'data_source': "DBR", 'subaward_number': row['subaward_num'], 'amount': row['subaward_amount'], 'description': row['description'], 'recovery_model_question1': row['q1_flag'], 'recovery_model_question2': row['q2_flag'], 'action_date': row['action_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'], 'broker_award_id': row['id'], 'internal_id': row['internal_id'], 'award_type': award_type, 'pop_country_code': row['principle_place_country'], 'pop_state_code': row['principle_place_state'], 'pop_state_name': row['principle_place_state_name'], 'pop_city_name': row['principle_place_city'], 'pop_zip4': row['principle_place_zip'], 'pop_street_address': row['principle_place_street'], 'pop_congressional_code': row['principle_place_district'], 'piid': row.get('piid', None), 'fain': row.get('fain', None), 'updated_at': datetime.now(timezone.utc), # keyword_ts_vector # leave NULL, matview SQL will populate # award_ts_vector # leave NULL, matview SQL will populate # recipient_name_ts_vector # leave NULL, matview SQL will populate # product_or_service_description # leave NULL, matview SQL will populate # total_obl_bin # leave NULL, matview SQL will populate # business_type_code # always NULL # extent_competed # always NULL } if shared_mappings['award']: subaward_dict.update({ "award_id": shared_mappings["award"].id, "prime_award_type": shared_mappings["award"].type, "last_modified_date": shared_mappings["award"].last_modified_date, "latest_transaction_id": shared_mappings["award"].latest_transaction_id, "business_categories": get_le_business_categories( shared_mappings["award"].recipient_id), }) funding_agency = get_agency_values( shared_mappings["award"].funding_agency) awarding_agency = get_agency_values( shared_mappings["award"].awarding_agency) contract_data = get_contract_fields( shared_mappings["award"].latest_transaction_id) if contract_data: subaward_dict.update({ 'pulled_from': contract_data['pulled_from'], 'product_or_service_code': contract_data['product_or_service_code'], # 'product_or_service_description': None, 'type_of_contract_pricing': contract_data['type_of_contract_pricing'], 'type_set_aside': contract_data['type_set_aside'], }) else: funding_agency, awarding_agency = None, None if funding_agency: subaward_dict.update({ "funding_agency_id": funding_agency["agency_id"], "funding_toptier_agency_abbreviation": funding_agency["toptier_agency_abbreviation"], "funding_toptier_agency_name": funding_agency["toptier_agency_name"], "funding_subtier_agency_abbreviation": funding_agency["subtier_agency_abbreviation"], "funding_subtier_agency_name": funding_agency["subtier_agency_name"], }) if awarding_agency: subaward_dict.update({ "awarding_agency_id": awarding_agency["agency_id"], "awarding_toptier_agency_abbreviation": awarding_agency["toptier_agency_abbreviation"], "awarding_toptier_agency_name": awarding_agency["toptier_agency_name"], "awarding_subtier_agency_abbreviation": awarding_agency["subtier_agency_abbreviation"], "awarding_subtier_agency_name": awarding_agency["subtier_agency_name"], }) cfda = None if 'cfda_numbers' in row and row['cfda_numbers']: only_num = row['cfda_numbers'].split(' ') cfda = Cfda.objects.filter(program_number=only_num[0]).first() if cfda: subaward_dict.update({ "cfda_number": cfda.program_number, "cfda_title": cfda.program_title, "cfda_id": cfda.pk, }) subaward_dict['pop_country_name'] = get_country_name_from_code( row['principle_place_country']) subaward_dict[ 'recipient_location_country_name'] = get_country_name_from_code( row['recipient_location_country_code']) performance_city_county = get_city_and_county_from_state( row['principle_place_state'], row['principle_place_city']) subaward_dict['pop_county_code'] = performance_city_county.get( "county_code") subaward_dict['pop_county_name'] = performance_city_county.get( "county_name") subaward_dict['pop_city_code'] = performance_city_county.get( "city_code") ref_loc_city_county = get_city_and_county_from_state( row['recipient_location_state_code'], row['recipient_location_city_name']) subaward_dict[ 'recipient_location_county_code'] = ref_loc_city_county.get( "county_code") subaward_dict[ 'recipient_location_county_name'] = ref_loc_city_county.get( "county_name") subaward_dict[ 'recipient_location_city_code'] = ref_loc_city_county.get( "city_code") # Either we're starting with an empty table in regards to this award type or we've deleted all # subawards related to the internal_id, either way we just create the subaward Subaward.objects.create(**subaward_dict) if shared_mappings["award"]: award_update_id_list.add(shared_mappings["award"].id)
def insert_new_fpds(self, to_insert, total_rows): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", } for index, row in enumerate(to_insert, 1): upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, { "recipient_flag": True, "is_fpds": True }) recipient_name = row["awardee_or_recipient_legal"] legal_entity = LegalEntity.objects.create( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name if recipient_name is not None else "", ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type="fpds"), "is_fpds": True, } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id generated_unique_id = ( "CONT_AW_" + (row["agency_id"] if row["agency_id"] else "-NONE-") + "_" + (row["referenced_idv_agency_iden"] if row["referenced_idv_agency_iden"] else "-NONE-") + "_" + (row["piid"] if row["piid"] else "-NONE-") + "_" + (row["parent_award_id"] if row["parent_award_id"] else "-NONE-")) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, piid=row["piid"]) award.parent_award_piid = row.get("parent_award_id") award.save() # Append row to list of Awards updated AWARD_UPDATE_ID_LIST.append(award.id) try: last_mod_date = datetime.strptime(str( row["last_modified"]), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row["last_modified"]), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "transaction_unique_id": row["detached_award_proc_unique"], "generated_unique_award_id": generated_unique_id, "is_fpds": True, } contract_field_map = { "type": "contract_award_type", "type_description": "contract_award_type_desc", "description": "award_description", } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True, ) contract_instance = load_data_into_model( TransactionFPDS(), row, as_dict=True) # thrown away detached_award_proc_unique = contract_instance[ "detached_award_proc_unique"] unique_fpds = TransactionFPDS.objects.filter( detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.now( timezone.utc) transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter( id=unique_fpds.first().transaction.id).update( **transaction_normalized_dict) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized( **transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = detached_award_proc_unique legal_entity.save()
def create_subaward(self, row, shared_award_mappings, award_type): """ Creates a subaward if the internal ID of the current row is in the shared award mappings (this was made to satisfy codeclimate complexity issues) """ # only insert the subaward if the internal_id is in our mappings, otherwise there was a problem # finding one or more parts of the shared data for it and we don't want to insert it. if row['internal_id'] in shared_award_mappings: shared_mappings = shared_award_mappings[row['internal_id']] prime_award_dict = {} if shared_mappings['award']: prime_award_dict['prime_recipient'] = shared_mappings['award'].recipient if prime_award_dict['prime_recipient']: prime_award_dict['prime_recipient_name'] = shared_mappings['award'].recipient.recipient_name prime_award_dict['business_categories'] = (shared_mappings['award'].recipient.business_categories or []) upper_case_dict_values(row) cfda = None # check if the key exists and if it isn't empty (only here for grants) if 'cfda_numbers' in row and row['cfda_numbers']: only_num = row['cfda_numbers'].split(' ') cfda = Cfda.objects.filter(program_number=only_num[0]).first() if award_type == 'procurement': le_location_map = location_d1_recipient_mapper(row) recipient_name = row['company_name'] parent_recipient_name = row['parent_company_name'] business_type_code = None business_types_description = row['bus_types'] else: le_location_map = location_d2_recipient_mapper(row) recipient_name = row['awardee_name'] parent_recipient_name = None business_type_code = None business_types_description = None if le_location_map["location_zip"]: le_location_map.update( zip4=le_location_map["location_zip"], zip5=le_location_map["location_zip"][:5], zip_last4=le_location_map["location_zip"][5:] ) le_location_map.pop("location_zip") recipient_location = Location(**le_location_map) recipient_location.pre_save() pop_value_map = pop_mapper(row) pop_value_map['place_of_performance_flag'] = True if pop_value_map["location_zip"]: pop_value_map.update( zip4=pop_value_map["location_zip"], zip5=pop_value_map["location_zip"][:5], zip_last4=pop_value_map["location_zip"][5:] ) pop_value_map.pop("location_zip") place_of_performance = Location(**pop_value_map) place_of_performance.pre_save() if not parent_recipient_name and row.get('parent_duns'): duns_obj = RecipientLookup.objects.filter(duns=row['parent_duns'], legal_business_name__isnull=False) \ .values('legal_business_name').first() if duns_obj: parent_recipient_name = duns_obj['legal_business_name'] subaward_dict = { 'award': shared_mappings['award'], 'recipient_unique_id': row['duns'], 'recipient_name': recipient_name, 'dba_name': row['dba_name'], 'parent_recipient_unique_id': row['parent_duns'], 'parent_recipient_name': parent_recipient_name, 'business_type_code': business_type_code, 'business_type_description': business_types_description, 'prime_recipient': prime_award_dict.get('prime_recipient', None), 'prime_recipient_name': prime_award_dict.get('prime_recipient_name', None), 'business_categories': prime_award_dict.get('business_categories', []), 'recipient_location_country_code': recipient_location.location_country_code, 'recipient_location_country_name': recipient_location.country_name, 'recipient_location_state_code': recipient_location.state_code, 'recipient_location_state_name': recipient_location.state_name, 'recipient_location_county_code': recipient_location.county_code, 'recipient_location_county_name': recipient_location.county_name, 'recipient_location_city_code': recipient_location.city_code, 'recipient_location_city_name': recipient_location.city_name, 'recipient_location_zip4': recipient_location.zip4, 'recipient_location_zip5': recipient_location.zip5, 'recipient_location_street_address': recipient_location.address_line1, 'recipient_location_congressional_code': recipient_location.congressional_code, 'recipient_location_foreign_postal_code': recipient_location.foreign_postal_code, 'officer_1_name': row['top_paid_fullname_1'], 'officer_1_amount': row['top_paid_amount_1'], 'officer_2_name': row['top_paid_fullname_2'], 'officer_2_amount': row['top_paid_amount_2'], 'officer_3_name': row['top_paid_fullname_3'], 'officer_3_amount': row['top_paid_amount_3'], 'officer_4_name': row['top_paid_fullname_4'], 'officer_4_amount': row['top_paid_amount_4'], 'officer_5_name': row['top_paid_fullname_5'], 'officer_5_amount': row['top_paid_amount_5'], 'data_source': "DBR", 'cfda': cfda, 'awarding_agency': shared_mappings['award'].awarding_agency if shared_mappings['award'] else None, 'funding_agency': shared_mappings['award'].funding_agency if shared_mappings['award'] else None, 'subaward_number': row['subaward_num'], 'amount': row['subaward_amount'], 'description': row['overall_description'], 'recovery_model_question1': row['q1_flag'], 'recovery_model_question2': row['q2_flag'], 'action_date': row['subaward_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'], 'broker_award_id': row['id'], 'internal_id': row['internal_id'], 'award_type': award_type, 'pop_country_code': row['principle_place_country'], 'pop_country_name': place_of_performance.country_name, 'pop_state_code': row['principle_place_state'], 'pop_state_name': row['principle_place_state_name'], 'pop_county_code': place_of_performance.county_code, 'pop_county_name': place_of_performance.county_name, 'pop_city_code': place_of_performance.city_code, 'pop_city_name': row['principle_place_city'], 'pop_zip4': row['principle_place_zip'], 'pop_street_address': row['principle_place_street'], 'pop_congressional_code': row['principle_place_district'], 'updated_at': datetime.utcnow() } # Either we're starting with an empty table in regards to this award type or we've deleted all # subawards related to the internal_id, either way we just create the subaward Subaward.objects.create(**subaward_dict) if shared_mappings['award']: award_update_id_list.append(shared_mappings['award'].id)
def insert_new_fabs(self, to_insert, total_rows): logger.info('Starting insertion of new FABS data') place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perform_country_n", "state_code": "place_of_perfor_state_code", "state_name": "place_of_perform_state_nam", "city_name": "place_of_performance_city", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "foreign_location_description": "place_of_performance_forei", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "performance_code": "place_of_performance_code", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5" } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "city_name": "legal_entity_city_name", "city_code": "legal_entity_city_code", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "foreign_location_description": "legal_entity_foreign_descr", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "foreign_city_name": "legal_entity_foreign_city" } start_time = datetime.now() for index, row in enumerate(to_insert, 1): if not (index % 1000): logger.info( 'Inserting Stale FABS: Inserting row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, {"recipient_flag": True}) recipient_name = row['awardee_or_recipient_legal'] legal_entity = LegalEntity.objects.create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name if recipient_name is not None else "", parent_recipient_unique_id=row['ultimate_parent_unique_ide']) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type='fabs'), "business_types_description": row['business_types_desc'] } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri # this will raise an exception if the cast to an int fails, that's ok since we don't want to process # non-numeric record type values record_type_int = int(row['record_type']) if record_type_int == 1: uri = row['uri'] if row['uri'] else '-NONE-' fain = '-NONE-' elif record_type_int in (2, 3): uri = '-NONE-' fain = row['fain'] if row['fain'] else '-NONE-' else: raise Exception( 'Invalid record type encountered for the following afa_generated_unique record: %s' % row['afa_generated_unique']) generated_unique_id = 'ASST_AW_' +\ (row['awarding_sub_tier_agency_c'] if row['awarding_sub_tier_agency_c'] else '-NONE-') + '_' + \ fain + '_' + uri # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, fain=row['fain'], uri=row['uri'], record_type=row['record_type']) award.save() # Append row to list of Awards updated award_update_id_list.append(award.id) try: last_mod_date = datetime.strptime(str( row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": last_mod_date, "type_description": row['assistance_type_desc'], "transaction_unique_id": row['afa_generated_unique'], "generated_unique_award_id": generated_unique_id } fad_field_map = { "type": "assistance_type", "description": "award_description", } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, as_dict=True) afa_generated_unique = financial_assistance_data[ 'afa_generated_unique'] unique_fabs = TransactionFABS.objects.filter( afa_generated_unique=afa_generated_unique) if unique_fabs.first(): transaction_normalized_dict["update_date"] = datetime.utcnow() transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # Update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fabs.first().transaction.id).\ update(**transaction_normalized_dict) # Update TransactionFABS unique_fabs.update(**financial_assistance_data) else: # Create TransactionNormalized transaction = TransactionNormalized( **transaction_normalized_dict) transaction.save() # Create TransactionFABS transaction_fabs = TransactionFABS(transaction=transaction, **financial_assistance_data) transaction_fabs.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = afa_generated_unique legal_entity.save()
def create_subaward(self, row, shared_award_mappings, award_type): """ Creates a subaward if the internal ID of the current row is in the shared award mappings (this was made to satisfy codeclimate complexity issues) """ # only insert the subaward if the internal_id is in our mappings, otherwise there was a problem # finding one or more parts of the shared data for it and we don't want to insert it. if row['internal_id'] in shared_award_mappings: shared_mappings = shared_award_mappings[row['internal_id']] upper_case_dict_values(row) cfda = None # check if the key exists and if it isn't empty (only here for grants) if 'cfda_numbers' in row and row['cfda_numbers']: only_num = row['cfda_numbers'].split(' ') cfda = Cfda.objects.filter(program_number=only_num[0]).first() recipient, place_of_performance = self.get_subaward_references( row, award_type) subaward_dict = { 'award': shared_mappings['award'], 'recipient': recipient, 'data_source': "DBR", 'cfda': cfda, 'awarding_agency': shared_mappings['award'].awarding_agency if shared_mappings['award'] else None, 'funding_agency': shared_mappings['award'].funding_agency if shared_mappings['award'] else None, 'place_of_performance': place_of_performance, 'subaward_number': row['subaward_num'], 'amount': row['subaward_amount'], 'description': row['overall_description'], 'recovery_model_question1': row['q1_flag'], 'recovery_model_question2': row['q2_flag'], 'action_date': row['subaward_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'], 'broker_award_id': row['id'], 'internal_id': row['internal_id'], 'award_type': award_type } # Either we're starting with an empty table in regards to this award type or we've deleted all # subawards related to the internal_id, either way we just create the subaward Subaward.objects.create(**subaward_dict) if shared_mappings['award']: award_update_id_list.append(shared_mappings['award'].id)
def load_file_c(submission_attributes, db_cursor, award_financial_frame): """ Process and load file C broker data. Note: this should run AFTER the D1 and D2 files are loaded because we try to join to those records to retrieve some additional information about the awarding sub-tier agency. """ # this matches the file b reverse directive, but am repeating it here to ensure that we don't overwrite it as we # change up the order of file loading if not award_financial_frame.size: logger.warning('No File C (award financial) data found, skipping...') return reverse = re.compile(r'(_(cpe|fyb)$)|^transaction_obligated_amount$') # dictionary to capture TAS that were skipped and some metadata # tas = top-level key # count = number of rows skipped # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted skipped_tas = {} award_financial_frame['txn'] = award_financial_frame.apply( get_award_financial_transaction, axis=1) award_financial_frame['awarding_agency'] = award_financial_frame.apply( get_awarding_agency, axis=1) award_financial_frame['object_class'] = award_financial_frame.apply( get_or_create_object_class_rw, axis=1, logger=logger) award_financial_frame['program_activity'] = award_financial_frame.apply( get_or_create_program_activity, axis=1, submission_attributes=submission_attributes) total_rows = award_financial_frame.shape[0] start_time = datetime.now() awards_touched = [] # for row in award_financial_data: for index, row in enumerate( award_financial_frame.replace({ np.nan: None }).to_dict(orient='records'), 1): if not (index % 100): logger.info('C File Load: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) upper_case_dict_values(row) # Check and see if there is an entry for this TAS treasury_account = get_treasury_appropriation_account_tas_lookup( row.get('tas_id'), db_cursor) if treasury_account is None: update_skipped_tas(row, skipped_tas) continue # Find a matching transaction record, so we can use its subtier agency information to match to (or create) an # Award record. # Find the award that this award transaction belongs to. If it doesn't exist, create it. created, award = get_or_create_summary_award( awarding_agency=row['awarding_agency'], piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id')) awards_touched += [award] award_financial_data = FinancialAccountsByAwards() value_map_faba = { 'award': award, 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, 'treasury_account': treasury_account, 'object_class': row.get('object_class'), 'program_activity': row.get('program_activity'), } # Still using the cpe|fyb regex compiled above for reverse load_data_into_model(award_financial_data, row, value_map=value_map_faba, save=True, reverse=reverse) awards_cache.clear() for key in skipped_tas: logger.info('Skipped %d rows due to missing TAS: %s', skipped_tas[key]['count'], key) total_tas_skipped = 0 for key in skipped_tas: total_tas_skipped += skipped_tas[key]['count'] logger.info( 'Skipped a total of {} TAS rows for File C'.format(total_tas_skipped)) return [id for award.id in awards_touched]