def test_txn_contract_get_or_create(): """Test TransactionFPDS.get_or_create_2 method.""" agency1 = mommy.make("references.Agency") awd1 = mommy.make("awards.Award", awarding_agency=agency1) txn1 = mommy.make( "awards.TransactionNormalized", award=awd1, modification_number="1", awarding_agency=agency1, last_modified_date=date(2012, 3, 1), ) mommy.make("awards.TransactionFPDS", transaction=txn1, piid="abc", base_and_all_options_value=1000) assert TransactionFPDS.objects.all().count() == 1 # an updated transaction should also update existing TransactionFPDS # data, not insert a new record tc_dict = {"piid": "abc", "base_and_all_options_value": 5000} tc2 = TransactionFPDS.get_or_create_2(txn1, **tc_dict) tc2.save() assert TransactionFPDS.objects.all().count() == 1 t = TransactionNormalized.objects.get(id=txn1.id) assert t.contract_data.piid == "abc" assert t.contract_data.base_and_all_options_value == "5000" # a new transaction gets a new TransactionFABS record tc_dict = {"piid": "xyz", "base_and_all_options_value": 5555} tc3 = TransactionFPDS.get_or_create_2( mommy.make("awards.TransactionNormalized"), **tc_dict) tc3.save() assert TransactionFPDS.objects.all().count() == 2
def load_transaction_fpds(self, fpds_broker_data, total_rows): logger.info('Starting bulk loading for FPDS data') start_time = datetime.now() for index, row in enumerate(fpds_broker_data, 1): if not (index % 10000): logger.info('Transaction FPDS: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) fpds_instance_data = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) fpds_instance = TransactionFPDS(**fpds_instance_data) fpds_instance.transaction = transaction_normalized_bulk[index - 1] fpds_bulk.append(fpds_instance) logger.info('Bulk creating Transaction FPDS (batch_size: {})...'.format(BATCH_SIZE)) TransactionFPDS.objects.bulk_create(fpds_bulk, batch_size=BATCH_SIZE)
def test_txn_contract_get_or_create(): """Test TransactionFPDS.get_or_create_2 method.""" agency1 = mommy.make('references.Agency') awd1 = mommy.make('awards.Award', awarding_agency=agency1) txn1 = mommy.make( 'awards.TransactionNormalized', award=awd1, modification_number='1', awarding_agency=agency1, last_modified_date=date(2012, 3, 1), ) mommy.make('awards.TransactionFPDS', transaction=txn1, piid='abc', base_and_all_options_value=1000) assert TransactionFPDS.objects.all().count() == 1 # an updated transaction should also update existing TransactionFPDS # data, not insert a new record tc_dict = { 'piid': 'abc', 'base_and_all_options_value': 5000, } tc2 = TransactionFPDS.get_or_create_2(txn1, **tc_dict) tc2.save() assert TransactionFPDS.objects.all().count() == 1 t = TransactionNormalized.objects.get(id=txn1.id) assert t.contract_data.piid == 'abc' assert t.contract_data.base_and_all_options_value == '5000' # a new transaction gets a new TransactionFABS record tc_dict = { 'piid': 'xyz', 'base_and_all_options_value': 5555, } tc3 = TransactionFPDS.get_or_create_2( mommy.make('awards.TransactionNormalized'), **tc_dict) tc3.save() assert TransactionFPDS.objects.all().count() == 2
def load_transaction_fpds(self, fpds_broker_data, total_rows): logger.info('Starting bulk loading for FPDS data') start_time = datetime.now() for index, row in enumerate(fpds_broker_data, 1): if not (index % 10000): logger.info( 'Transaction FPDS: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) fpds_instance_data = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) fpds_instance = TransactionFPDS(**fpds_instance_data) fpds_instance.transaction = transaction_normalized_bulk[index - 1] fpds_bulk.append(fpds_instance) logger.info( 'Bulk creating Transaction FPDS (batch_size: {})...'.format( BATCH_SIZE)) TransactionFPDS.objects.bulk_create(fpds_bulk, batch_size=BATCH_SIZE)
def insert_new_fpds(self, to_insert, total_rows): logger.info('Starting insertion of new FPDS data') place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5" } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5" } start_time = datetime.now() for index, row in enumerate(to_insert, 1): if not (index % 1000): logger.info( 'Inserting Stale FPDS: Inserting row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) for key in row: if isinstance(row[key], str): row[key] = row[key].upper() # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, { "recipient_flag": True, "is_fpds": True }) recipient_name = row['awardee_or_recipient_legal'] legal_entity = LegalEntity.objects.create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name if recipient_name is not None else "") legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type='fpds'), "is_fpds": True } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id generated_unique_id = 'CONT_AW_' + (row['agency_id'] if row['agency_id'] else '-NONE-') + '_' + \ (row['referenced_idv_agency_iden'] if row['referenced_idv_agency_iden'] else '-NONE-') + '_' + \ (row['piid'] if row['piid'] else '-NONE-') + '_' + \ (row['parent_award_id'] if row['parent_award_id'] else '-NONE-') # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, piid=row['piid']) award.parent_award_piid = row.get('parent_award_id') award.save() # Append row to list of Awards updated award_update_id_list.append(award.id) try: last_mod_date = datetime.strptime(str( row['last_modified']), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row['last_modified']), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": last_mod_date, "transaction_unique_id": row['detached_award_proc_unique'], "generated_unique_award_id": generated_unique_id, "is_fpds": True } contract_field_map = { "type": "contract_award_type", "type_description": "contract_award_type_desc", "description": "award_description" } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) detached_award_proc_unique = contract_instance[ 'detached_award_proc_unique'] unique_fpds = TransactionFPDS.objects.filter( detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.utcnow() transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).\ update(**transaction_normalized_dict) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized( **transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save()
def insert_new_fpds(self, to_insert, total_rows): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", } for index, row in enumerate(to_insert, 1): upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, {"recipient_flag": True, "is_fpds": True} ) recipient_name = row["awardee_or_recipient_legal"] legal_entity = LegalEntity.objects.create( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name if recipient_name is not None else "", ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type="fpds"), "is_fpds": True, } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id generated_unique_id = ( "CONT_AW_" + (row["agency_id"] if row["agency_id"] else "-NONE-") + "_" + (row["referenced_idv_agency_iden"] if row["referenced_idv_agency_iden"] else "-NONE-") + "_" + (row["piid"] if row["piid"] else "-NONE-") + "_" + (row["parent_award_id"] if row["parent_award_id"] else "-NONE-") ) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, piid=row["piid"] ) award.parent_award_piid = row.get("parent_award_id") award.save() # Append row to list of Awards updated AWARD_UPDATE_ID_LIST.append(award.id) if row["last_modified"] and len(str(row["last_modified"])) == len("YYYY-MM-DD HH:MM:SS"): # 19 characters dt_fmt = "%Y-%m-%d %H:%M:%S" else: dt_fmt = "%Y-%m-%d %H:%M:%S.%f" # try using this even if last_modified isn't a valid string try: last_mod_date = datetime.strptime(str(row["last_modified"]), dt_fmt).date() except ValueError: # handle odd-string formats and NULLs from the upstream FPDS-NG system info_message = "Invalid value '{}' does not match: '{}'".format(row["last_modified"], dt_fmt) logger.info(info_message) last_mod_date = None award_type, award_type_desc = award_types(row) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "transaction_unique_id": row["detached_award_proc_unique"], "generated_unique_award_id": generated_unique_id, "is_fpds": True, "type": award_type, "type_description": award_type_desc, } contract_field_map = {"description": "award_description"} transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True, ) contract_instance = load_data_into_model(TransactionFPDS(), row, as_dict=True) # thrown away detached_award_proc_unique = contract_instance["detached_award_proc_unique"] unique_fpds = TransactionFPDS.objects.filter(detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.now(timezone.utc) transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).update( **transaction_normalized_dict ) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized(**transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = detached_award_proc_unique legal_entity.save()
def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFPDS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True) query = "SELECT * FROM detached_award_procurement" arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s' arguments += [limit, (page-1)*limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") procurement_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } legal_entity_location_value_map = { "recipient_flag": True } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = { "place_of_performance_flag": True } contract_field_map = { "type": "contract_award_type", "description": "award_description" } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(procurement_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of procurement data") # skip_count = 0 start_time = datetime.now() for index, row in enumerate(procurement_data, 1): with db_transaction.atomic(): # if TransactionFPDS.objects.values('detached_award_procurement_id').\ # filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first(): # skip_count += 1 # # if not (skip_count % 100): # logger.info('Skipped {} records so far'.format(str(skip_count))) if not (index % 100): logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map) ) # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name ) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = \ subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = toptier_agency_map[funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get('parent_award_id')) award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction.save() contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) # catch exception and do nothing if we see # "django.db.utils.IntegrityError: duplicate key value violates unique constraint" try: transaction_contract.save() except IntegrityError: pass
def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFPDS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True) query = "SELECT * FROM detached_award_procurement" arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s' arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") procurement_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } legal_entity_location_value_map = {"recipient_flag": True} place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = {"place_of_performance_flag": True} contract_field_map = { "type": "contract_award_type", "description": "award_description" } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(procurement_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of procurement data") # skip_count = 0 start_time = datetime.now() for index, row in enumerate(procurement_data, 1): with db_transaction.atomic(): # if TransactionFPDS.objects.values('detached_award_procurement_id').\ # filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first(): # skip_count += 1 # # if not (skip_count % 100): # logger.info('Skipped {} records so far'.format(str(skip_count))) if not (index % 100): logger.info( 'D1 File Load: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map)) # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[ awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = toptier_agency_map[ awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len( row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get( row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = \ subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = toptier_agency_map[ funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get('parent_award_id')) award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier( row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction.save() contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) # catch exception and do nothing if we see # "django.db.utils.IntegrityError: duplicate key value violates unique constraint" try: transaction_contract.save() except IntegrityError: pass
def insert_new_fpds(self, to_insert, total_rows): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", } for index, row in enumerate(to_insert, 1): upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, { "recipient_flag": True, "is_fpds": True }) recipient_name = row["awardee_or_recipient_legal"] legal_entity = LegalEntity.objects.create( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name if recipient_name is not None else "", ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type="fpds"), "is_fpds": True, } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id generated_unique_id = ( "CONT_AW_" + (row["agency_id"] if row["agency_id"] else "-NONE-") + "_" + (row["referenced_idv_agency_iden"] if row["referenced_idv_agency_iden"] else "-NONE-") + "_" + (row["piid"] if row["piid"] else "-NONE-") + "_" + (row["parent_award_id"] if row["parent_award_id"] else "-NONE-")) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, piid=row["piid"]) award.parent_award_piid = row.get("parent_award_id") award.save() # Append row to list of Awards updated AWARD_UPDATE_ID_LIST.append(award.id) try: last_mod_date = datetime.strptime(str( row["last_modified"]), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row["last_modified"]), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "transaction_unique_id": row["detached_award_proc_unique"], "generated_unique_award_id": generated_unique_id, "is_fpds": True, } contract_field_map = { "type": "contract_award_type", "type_description": "contract_award_type_desc", "description": "award_description", } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True, ) contract_instance = load_data_into_model( TransactionFPDS(), row, as_dict=True) # thrown away detached_award_proc_unique = contract_instance[ "detached_award_proc_unique"] unique_fpds = TransactionFPDS.objects.filter( detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.now( timezone.utc) transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter( id=unique_fpds.first().transaction.id).update( **transaction_normalized_dict) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized( **transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = detached_award_proc_unique legal_entity.save()
def load_file_d1(submission_attributes, procurement_data, db_cursor, quick=False): """ Process and load file D1 broker data (contract award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name "city_name": "place_of_performance_locat", "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = {"place_of_performance_flag": True} legal_entity_location_value_map = {"recipient_flag": True} contract_field_map = { "type": "contract_award_type", "description": "award_description" } d_start_time = time.time() if quick: parameters = { 'broker_submission_id': submission_attributes.broker_submission_id } run_sql_file('usaspending_api/etl/management/load_file_d1.sql', parameters) logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time)) return total_rows = len(procurement_data) start_time = datetime.now() for index, row in enumerate(procurement_data, 1): if not (index % 100): logger.info('D1 File Load: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map)) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len( row['funding_agency_code'].strip()) < 1: funding_agency = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]) row['funding_agency_code'] = ( funding_agency.toptier_agency.cgac_code if funding_agency is not None else None) # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get( 'parent_award_id')) # It is a FAIN/PIID/URI, not our db's pk award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction.save() contract_value_map = { 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e']) } contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, field_map=contract_field_map, value_map=contract_value_map, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) transaction_contract.save() logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))
def insert_new_fpds(self, to_insert, total_rows): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", } fpds_normalized_field_map = { "type": "contract_award_type", "description": "award_description" } fpds_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount" } for index, row in enumerate(to_insert, 1): upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, {"recipient_flag": True, "is_fpds": True} ) recipient_name = row["awardee_or_recipient_legal"] legal_entity = LegalEntity.objects.create( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name if recipient_name is not None else "", ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type="fpds"), "is_fpds": True, } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"]) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=row["unique_award_key"], piid=row["piid"], ) award.parent_award_piid = row.get("parent_award_id") award.save() # Append row to list of Awards updated AWARD_UPDATE_ID_LIST.append(award.id) if row["last_modified"] and len(str(row["last_modified"])) == len("YYYY-MM-DD HH:MM:SS"): # 19 characters dt_fmt = "%Y-%m-%d %H:%M:%S" else: dt_fmt = "%Y-%m-%d %H:%M:%S.%f" # try using this even if last_modified isn't a valid string try: last_mod_date = datetime.strptime(str(row["last_modified"]), dt_fmt).date() except ValueError: # handle odd-string formats and NULLs from the upstream FPDS-NG system info_message = "Invalid value '{}' does not match: '{}'".format(row["last_modified"], dt_fmt) logger.info(info_message) last_mod_date = None award_type, award_type_desc = award_types(row) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "transaction_unique_id": row["detached_award_proc_unique"], "is_fpds": True, "type": award_type, "type_description": award_type_desc, } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fpds_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, field_map=fpds_field_map, as_dict=True ) detached_award_proc_unique = contract_instance["detached_award_proc_unique"] unique_fpds = TransactionFPDS.objects.filter(detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.now(timezone.utc) transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).update( **transaction_normalized_dict ) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized(**transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = detached_award_proc_unique legal_entity.save()
def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFPDS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True) query = "SELECT * FROM detached_award_procurement" arguments = [] fy_begin = "10/01/" + str(fiscal_year - 1) fy_end = "09/30/" + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += " action_date::Date BETWEEN %s AND %s" arguments += [fy_begin] arguments += [fy_end] query += " ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s" arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") procurement_data = dictfetchall(db_cursor) fpds_normalized_field_map = { "type": "contract_award_type", "description": "award_description" } fpds_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount", } logger.info("Getting total rows") total_rows = len(procurement_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of procurement data") start_time = datetime.now() for index, row in enumerate(procurement_data, 1): with db_transaction.atomic(): if not (index % 100): logger.info( "D1 File Load: Loading row {} of {} ({})".format( str(index), str(total_rows), datetime.now() - start_time)) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row["awarding_agency_code"] is None or len( row["awarding_agency_code"].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[ awarding_subtier_agency_id]["toptier_agency_id"] awarding_toptier_code = toptier_agency_map[ awarding_toptier_agency_id] row["awarding_agency_code"] = awarding_toptier_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row["funding_agency_code"] is None or len( row["funding_agency_code"].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get( row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = subtier_to_agency_map[ funding_subtier_agency_id]["toptier_agency_id"] funding_toptier_code = toptier_agency_map[ funding_toptier_agency_id] else: funding_toptier_code = None row["funding_agency_code"] = funding_toptier_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row["awarding_agency_code"], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get("piid"), fain=row.get("fain"), uri=row.get("uri"), parent_award_piid=row.get("parent_award_id"), generated_unique_award_id=row.get("unique_award_key"), ) award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier( row["funding_agency_code"], row["funding_sub_tier_agency_co"]), "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fpds_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) transaction = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction.save() contract_instance = load_data_into_model( TransactionFPDS(), row, field_map=fpds_field_map, as_dict=True # thrown away ) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) # catch exception and do nothing if we see # "django.db.utils.IntegrityError: duplicate key value violates unique constraint" try: transaction_contract.save() except IntegrityError: pass
def load_file_d1(submission_attributes, procurement_data, db_cursor, quick=False): """ Process and load file D1 broker data (contract award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name "city_name": "place_of_performance_locat", "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = { "place_of_performance_flag": True } legal_entity_location_value_map = { "recipient_flag": True } contract_field_map = { "type": "contract_award_type", "description": "award_description" } d_start_time = time.time() if quick: parameters = {'broker_submission_id': submission_attributes.broker_submission_id} run_sql_file('usaspending_api/etl/management/load_file_d1.sql', parameters) logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time)) return total_rows = len(procurement_data) start_time = datetime.now() for index, row in enumerate(procurement_data, 1): if not (index % 100): logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map) ) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name ) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"]) row['funding_agency_code'] = ( funding_agency.toptier_agency.cgac_code if funding_agency is not None else None) # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get('parent_award_id')) # It is a FAIN/PIID/URI, not our db's pk award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction.save() contract_value_map = { 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e']) } contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, field_map=contract_field_map, value_map=contract_value_map, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) transaction_contract.save() logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))