def load_legal_entity(self, fabs_broker_data, total_rows): start_time = datetime.now() for index, row in enumerate(fabs_broker_data, 1): if not (index % 10000): logger.info('Legal Entity: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = '' recipient_unique_id = row['awardee_or_recipient_uniqu'] if recipient_unique_id is None: recipient_unique_id = '' lookup_key = (recipient_unique_id, recipient_name) legal_entity = self.le_map.get(lookup_key) if not legal_entity: legal_entity = LegalEntity( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) legal_entity = load_data_into_model( legal_entity, row, value_map={"location": lel_bulk[index - 1]}, save=False) LegalEntity.update_business_type_categories(legal_entity) self.le_map[lookup_key] = legal_entity legal_entity_bulk.append(legal_entity) legal_entity_lookup.append(legal_entity) logger.info('Bulk creating Legal Entities (batch_size: {})...'.format( BATCH_SIZE)) LegalEntity.objects.bulk_create(legal_entity_bulk, batch_size=BATCH_SIZE)
def load_legal_entity(self, fpds_broker_data, total_rows): start_time = datetime.now() for index, row in enumerate(fpds_broker_data, 1): if not (index % 10000): logger.info('Legal Entity: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = '' recipient_unique_id = row['awardee_or_recipient_uniqu'] if recipient_unique_id is None: recipient_unique_id = '' lookup_key = (recipient_unique_id, recipient_name) legal_entity = self.le_map.get(lookup_key) if not legal_entity: legal_entity = LegalEntity( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name ) legal_entity = load_data_into_model( legal_entity, row, value_map={"location": lel_bulk[index - 1]}, save=False) LegalEntity.update_business_type_categories(legal_entity) self.le_map[lookup_key] = legal_entity legal_entity_bulk.append(legal_entity) legal_entity_lookup.append(legal_entity) logger.info('Bulk creating Legal Entities (batch_size: {})...'.format(BATCH_SIZE)) LegalEntity.objects.bulk_create(legal_entity_bulk, batch_size=BATCH_SIZE)
def test_get_or_create_by_duns(): # Null duns should always return True for created assert LegalEntity.get_or_create_by_duns( duns=None)[1] # Assert created is True assert LegalEntity.get_or_create_by_duns( duns=None)[1] # Assert created is True assert LegalEntity.get_or_create_by_duns( duns=None)[1] # Assert created is True assert LegalEntity.get_or_create_by_duns( duns="")[1] # Assert created is True assert LegalEntity.get_or_create_by_duns( duns="")[1] # Assert created is True assert LegalEntity.get_or_create_by_duns( duns="")[1] # Assert created is True # Let's create a new record with a DUNS entity, created = LegalEntity.get_or_create_by_duns(duns="123456789") assert created assert LegalEntity.get_or_create_by_duns(duns="123456789")[0] == entity assert not LegalEntity.get_or_create_by_duns(duns="123456789")[1]
def get_or_create_recipient(self, row): recipient_dict = { "location_id": h.get_or_create_location( row, mapper=location_mapper_fin_assistance_recipient).location_id, "recipient_name": row['recipient_name'], } recipient_type = row.get("recipient_type", ":").split(":")[1].strip() recipient_dict.update(self.recipient_flags_by_type(recipient_type)) le, created = LegalEntity.get_or_create_by_duns(duns=row['duns_no']) if created: # Update from our recipient dictionary for attr, value in recipient_dict.items(): setattr(le, attr, value) le.save() return le
def load_subawards(submission_attributes, db_cursor): """ Loads File F from the broker. db_cursor should be the db_cursor for Broker """ # A list of award id's to update the subaward accounts and totals on award_ids_to_update = set() # Get a list of PIIDs from this submission awards_for_sub = Award.objects.filter(transaction__submission=submission_attributes).distinct() piids = list(awards_for_sub.values_list("piid", flat=True)) fains = list(awards_for_sub.values_list("fain", flat=True)) # This allows us to handle an empty list in the SQL without changing the query piids.append(None) fains.append(None) # D1 File F db_cursor.execute(D1_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(piids)]) d1_f_data = dictfetchall(db_cursor) logger.info("Creating D1 F File Entries (Subcontracts): {}".format(len(d1_f_data))) d1_create_count = 0 d1_update_count = 0 d1_empty_count = 0 for row in d1_f_data: if row['subcontract_num'] is None: if row['id'] is not None and row['subcontract_amount'] is not None: logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"])) logger.warn("Failing row: {}".format(row)) else: d1_empty_count += 1 continue # Get the agency agency = get_valid_awarding_agency(row) if not agency: logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(row['subcontract_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])) continue # Find the award to attach this sub-contract to # We perform this lookup by finding the Award containing a transaction with # a matching parent award id, piid, and submission attributes award = Award.objects.filter(awarding_agency=agency, transaction__submission=submission_attributes, transaction__contract_data__piid=row['piid'], transaction__contract_data__isnull=False, transaction__contract_data__parent_award_id=row['parent_award_id']).distinct().order_by("-date_signed").first() # We don't have a matching award for this subcontract, log a warning and continue to the next row if not award: logger.warn("Subcontract number {} cannot find matching award with piid {}, parent_award_id {}; skipping...".format(row['subcontract_num'], row['piid'], row['parent_award_id'])) continue award_ids_to_update.add(award.id) # Find the recipient by looking up by duns recipient, created = LegalEntity.get_or_create_by_duns(duns=row['duns']) if created: recipient.parent_recipient_unique_id = row['parent_duns'] recipient.recipient_name = row['company_name'] recipient.location = get_or_create_location(row, location_d1_recipient_mapper) recipient.save() # Get or create POP place_of_performance = get_or_create_location(row, pop_mapper) d1_f_dict = { 'award': award, 'recipient': recipient, 'submission': submission_attributes, 'data_source': "DBR", 'cfda': None, 'awarding_agency': award.awarding_agency, 'funding_agency': award.funding_agency, 'place_of_performance': place_of_performance, 'subaward_number': row['subcontract_num'], 'amount': row['subcontract_amount'], 'description': row['overall_description'], 'recovery_model_question1': row['recovery_model_q1'], 'recovery_model_question2': row['recovery_model_q2'], 'action_date': row['subcontract_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'], 'naics': row['naics'], 'naics_description': row['naics_description'], } # Create the subaward subaward, created = Subaward.objects.update_or_create(subaward_number=row['subcontract_num'], award=award, defaults=d1_f_dict) if created: d1_create_count += 1 else: d1_update_count += 1 # D2 File F db_cursor.execute(D2_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(fains)]) d2_f_data = dictfetchall(db_cursor) logger.info("Creating D2 F File Entries (Subawards): {}".format(len(d2_f_data))) d2_create_count = 0 d2_update_count = 0 d2_empty_count = 0 for row in d2_f_data: if row['subaward_num'] is None: if row['id'] is not None and row['subaward_amount'] is not None: logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"])) logger.warn("Failing row: {}".format(row)) else: d2_empty_count += 1 continue agency = get_valid_awarding_agency(row) if not agency: logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(row['subaward_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])) continue # Find the award to attach this sub-award to # We perform this lookup by finding the Award containing a transaction with # a matching fain and submission. If this fails, try submission and uri if row['fain'] and len(row['fain']) > 0: award = Award.objects.filter(awarding_agency=agency, transaction__submission=submission_attributes, transaction__assistance_data__isnull=False, transaction__assistance_data__fain=row['fain']).distinct().order_by("-date_signed").first() # Couldn't find a match on FAIN, try URI if it exists if not award and row['uri'] and len(row['uri']) > 0: award = Award.objects.filter(awarding_agency=agency, transaction__submission=submission_attributes, transaction__assistance_data__isnull=False, transaction__assistance_data__uri=row['uri']).distinct().first() # We don't have a matching award for this subcontract, log a warning and continue to the next row if not award: logger.warn("Subaward number {} cannot find matching award with fain {}, uri {}; skipping...".format(row['subaward_num'], row['fain'], row['uri'])) continue award_ids_to_update.add(award.id) # Find the recipient by looking up by duns recipient, created = LegalEntity.get_or_create_by_duns(duns=row['duns']) if created: recipient_name = row['awardee_name'] if recipient_name is None: recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" recipient.recipient_name = recipient_name recipient.parent_recipient_unique_id = row['parent_duns'] recipient.location = get_or_create_location(row, location_d2_recipient_mapper) recipient.save() # Get or create POP place_of_performance = get_or_create_location(row, pop_mapper) # Get CFDA Program cfda = Cfda.objects.filter(program_number=row['cfda_number']).first() d2_f_dict = { 'award': award, 'recipient': recipient, 'submission': submission_attributes, 'data_source': "DBR", 'cfda': cfda, 'awarding_agency': award.awarding_agency, 'funding_agency': award.funding_agency, 'place_of_performance': place_of_performance, 'subaward_number': row['subaward_num'], 'amount': row['subaward_amount'], 'description': row['project_description'], 'recovery_model_question1': row['compensation_q1'], 'recovery_model_question2': row['compensation_q2'], 'action_date': row['subaward_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'], 'naics': None, 'naics_description': None, } # Create the subaward subaward, created = Subaward.objects.update_or_create(subaward_number=row['subaward_num'], award=award, defaults=d2_f_dict) if created: d2_create_count += 1 else: d2_update_count += 1 # Update Award objects with subaward aggregates update_award_subawards(tuple(award_ids_to_update)) logger.info( """Submission {} Subcontracts created: {} Subcontracts updated: {} Empty subcontract rows: {} Subawards created: {} Subawards updated: {} Empty subaward rows: {}""".format(submission_attributes.broker_submission_id, d1_create_count, d1_update_count, d1_empty_count, d2_create_count, d2_update_count, d2_empty_count))
def load_file_d2(submission_attributes, award_financial_assistance_data, db_cursor): """ Process and load file D2 broker data (financial assistance award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_code": "legal_entity_city_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } legal_entity_location_value_map = {"recipient_flag": True} place_of_performance_value_map = {"place_of_performance_flag": True} fad_field_map = { "type": "assistance_type", "description": "award_description", } for row in award_financial_assistance_data: legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map) # Create the legal entity if it doesn't exist try: legal_entity = LegalEntity.objects.get( recipient_unique_id=row['awardee_or_recipient_uniqu']) except ObjectDoesNotExist: legal_entity_value_map = { "location": legal_entity_location, "legal_entity_id": row['awardee_or_recipient_uniqu'] } legal_entity = load_data_into_model( LegalEntity(), row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map) # If toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up if row['awarding_agency_code'] is None: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code if row['funding_agency_code'] is None: row['funding_agency_code'] = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id')) award.save() AWARD_UPDATE_ID_LIST.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_instance = load_data_into_model( Transaction(), row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_instance, created = Transaction.objects.get_or_create( **transaction_instance) fad_value_map = { "transaction": transaction_instance, "submission": submission_attributes, "cfda": CFDAProgram.objects.filter( program_number=row['cfda_number']).first(), 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), } financial_assistance_data = load_data_into_model( TransactionAssistance(), row, field_map=fad_field_map, value_map=fad_value_map, save=True)
def load_file_d1(submission_attributes, procurement_data, db_cursor): """ Process and load file D1 broker data (contract award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name "city_name": "place_of_performance_locat", "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = {"place_of_performance_flag": True} legal_entity_location_value_map = {"recipient_flag": True} contract_field_map = { "type": "contract_award_type", "description": "award_description" } for row in procurement_data: legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map) # Create the legal entity if it doesn't exist try: legal_entity = LegalEntity.objects.get( recipient_unique_id=row['awardee_or_recipient_uniqu']) except ObjectDoesNotExist: legal_entity_value_map = { "location": legal_entity_location, "legal_entity_id": row['awardee_or_recipient_uniqu'], } legal_entity = load_data_into_model( LegalEntity(), row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map) # If awarding/funding toptier agency code (aka CGAC) is not supplied on the D1 record, # use the sub tier code to look it up if row['awarding_agency_code'] is None: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code if row['funding_agency_code'] is None: row['funding_agency_code'] = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id')) award.save() AWARD_UPDATE_ID_LIST.append(award.id) AWARD_CONTRACT_UPDATE_ID_LIST.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_instance = load_data_into_model( Transaction(), row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_instance, created = Transaction.objects.get_or_create( **transaction_instance) contract_value_map = { 'transaction': transaction_instance, 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e']) } contract_instance = load_data_into_model(TransactionContract(), row, field_map=contract_field_map, value_map=contract_value_map, save=True)
def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFABS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True) query = "SELECT * FROM published_award_financial_assistance" arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s' arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") award_financial_assistance_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } fad_field_map = { "type": "assistance_type", "description": "award_description", } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(award_financial_assistance_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of assistance data") # skip_count = 0 # ROW ITERATION STARTS HERE lel_bulk = [] pop_bulk = [] legal_entity_bulk = [] award_bulk = [] transaction_assistance_bulk = [] transaction_normalized_bulk = [] logger.info( 'Getting legal entity location objects for {} rows...'.format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Recipient flag is true for LeL legal_entity_location = get_or_create_location( legal_entity_location_field_map, row, {"recipient_flag": True}, save=False) lel_bulk.append(legal_entity_location) logger.info('Bulk creating {} legal entity location rows...'.format( len(lel_bulk))) try: Location.objects.bulk_create(lel_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info( 'Getting place of performance objects for {} rows...'.format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Place of Performance flag is true for PoP pop_location = get_or_create_location( place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False) pop_bulk.append(pop_location) logger.info('Bulk creating {} place of performance rows...'.format( len(pop_bulk))) try: Location.objects.bulk_create(pop_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting legal entity objects for {} rows...'.format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): recipient_name = row.get('awardee_or_recipient_legal', '') legal_entity = LegalEntity.objects.filter( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name).first() if legal_entity is None: legal_entity = LegalEntity( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) legal_entity_value_map = { "location": lel_bulk[index - 1], } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=False) legal_entity_bulk.append(legal_entity) logger.info('Bulk creating {} legal entity rows...'.format( len(legal_entity_bulk))) try: LegalEntity.objects.bulk_create(legal_entity_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') awarding_agency_list = [] funding_agency_list = [] logger.info('Getting award objects for {} rows...'.format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[ awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = toptier_agency_map[ awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len( row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get( row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = \ subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = toptier_agency_map[ funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_toptier_subtier( row['funding_agency_code'], row["funding_sub_tier_agency_co"]) awarding_agency_list.append(awarding_agency) funding_agency_list.append(funding_agency) # award.save() is called in Award.get_or_create_summary_award by default created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, fain=row.get('fain'), uri=row.get('uri'), save=False) award_bulk.append(award) award_update_id_list.append(award.id) logger.info('Bulk creating {} award rows...'.format(len(award_bulk))) try: Award.objects.bulk_create(award_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting transaction_normalized for {} rows...'.format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): parent_txn_value_map = { "award": award_bulk[index - 1], "awarding_agency": awarding_agency_list[index - 1], "funding_agency": funding_agency_list[index - 1], "recipient": legal_entity_bulk[index - 1], "place_of_performance": pop_bulk[index - 1], "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_normalized = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction_normalized.fiscal_year = fy( transaction_normalized.action_date) transaction_normalized_bulk.append(transaction_normalized) logger.info('Bulk creating {} TransactionNormalized rows...'.format( len(transaction_normalized_bulk))) try: TransactionNormalized.objects.bulk_create( transaction_normalized_bulk) except IntegrityError: logger.info( 'Tried and failed to insert duplicate transaction_normalized row. Continuing... ' ) for index, row in enumerate(award_financial_assistance_data, 1): financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, as_dict=True) transaction_assistance = TransactionFABS( transaction=transaction_normalized_bulk[index - 1], **financial_assistance_data) transaction_assistance_bulk.append(transaction_assistance) logger.info('Bulk creating TransactionFABS rows...') try: TransactionFABS.objects.bulk_create(transaction_assistance_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ')
def forwards_func(apps, schema_editor): legalentities = apps.get_model("references", "LegalEntity") for le in legalentities.objects.all(): LegalEntity.update_business_type_categories(le) le.save()
def get_or_create_recipient(self, row): recipient_dict = { "location_id": h.get_or_create_location(row, mapper=location_mapper_vendor).location_id, "recipient_name": row['vendorname'], "vendor_phone_number": row['phoneno'], "vendor_fax_number": row['faxno'], "parent_recipient_unique_id": row['parentdunsnumber'], "city_local_government": self.parse_first_character(row['iscitylocalgovernment']), "county_local_government": self.parse_first_character(row['iscountylocalgovernment']), "inter_municipal_local_government": self.parse_first_character(row['isintermunicipallocalgovernment']), "local_government_owned": self.parse_first_character(row['islocalgovernmentowned']), "municipality_local_government": self.parse_first_character(row['ismunicipalitylocalgovernment']), "school_district_local_government": self.parse_first_character(row['isschooldistrictlocalgovernment']), "township_local_government": self.parse_first_character(row['istownshiplocalgovernment']), "federal_agency": self.parse_first_character(row['isfederalgovernmentagency']), "us_federal_government": self.parse_first_character(row['federalgovernmentflag']), "federally_funded_research_and_development_corp": self.parse_first_character(row['isfederallyfundedresearchanddevelopmentcorp']), "us_tribal_government": self.parse_first_character(row['istriballyownedfirm']), "c8a_program_participant": self.parse_first_character(row['firm8aflag']), "foreign_government": self.parse_first_character(row['isforeigngovernment']), "community_developed_corporation_owned_firm": self.parse_first_character(row['iscommunitydevelopedcorporationownedfirm']), "labor_surplus_area_firm": self.parse_first_character(row['islaborsurplusareafirm']), "small_agricultural_cooperative": self.parse_first_character(row['issmallagriculturalcooperative']), "international_organization": self.parse_first_character(row['isinternationalorganization']), "c1862_land_grant_college": self.parse_first_character(row['is1862landgrantcollege']), "c1890_land_grant_college": self.parse_first_character(row['is1890landgrantcollege']), "c1994_land_grant_college": self.parse_first_character(row['is1994landgrantcollege']), "minority_institution": self.parse_first_character(row['minorityinstitutionflag']), "private_university_or_college": self.parse_first_character(row['isprivateuniversityorcollege']), "school_of_forestry": self.parse_first_character(row['isschoolofforestry']), "state_controlled_institution_of_higher_learning": self.parse_first_character(row['isstatecontrolledinstitutionofhigherlearning']), "tribal_college": self.parse_first_character(row['istribalcollege']), "veterinary_college": self.parse_first_character(row['isveterinarycollege']), "educational_institution": self.parse_first_character(row['educationalinstitutionflag']), "alaskan_native_servicing_institution": self.parse_first_character(row['isalaskannativeownedcorporationorfirm']), "community_development_corporation": self.parse_first_character(row['iscommunitydevelopmentcorporation']), "native_hawaiian_servicing_institution": self.parse_first_character(row['isnativehawaiianownedorganizationorfirm']), "domestic_shelter": self.parse_first_character(row['isdomesticshelter']), "manufacturer_of_goods": self.parse_first_character(row['ismanufacturerofgoods']), "hospital_flag": self.parse_first_character(row['hospitalflag']), "veterinary_hospital": self.parse_first_character(row['isveterinaryhospital']), "hispanic_servicing_institution": self.parse_first_character(row['ishispanicservicinginstitution']), "woman_owned_business": self.parse_first_character(row['womenownedflag']), "minority_owned_business": self.parse_first_character(row['minorityownedbusinessflag']), "women_owned_small_business": self.parse_first_character(row['iswomenownedsmallbusiness']), "economically_disadvantaged_women_owned_small_business": self.parse_first_character(row['isecondisadvwomenownedsmallbusiness']), "joint_venture_economic_disadvantaged_women_owned_small_bus": self.parse_first_character(row['isjointventureecondisadvwomenownedsmallbusiness']), "veteran_owned_business": self.parse_first_character(row['veteranownedflag']), "airport_authority": self.parse_first_character(row['isairportauthority']), "housing_authorities_public_tribal": self.parse_first_character(row['ishousingauthoritiespublicortribal']), "interstate_entity": self.parse_first_character(row['isinterstateentity']), "planning_commission": self.parse_first_character(row['isplanningcommission']), "port_authority": self.parse_first_character(row['isportauthority']), "transit_authority": self.parse_first_character(row['istransitauthority']), "foreign_owned_and_located": self.parse_first_character(row['isforeignownedandlocated']), "american_indian_owned_business": self.parse_first_character(row['aiobflag']), "alaskan_native_owned_corporation_or_firm": self.parse_first_character(row['isalaskannativeownedcorporationorfirm']), "indian_tribe_federally_recognized": self.parse_first_character(row['isindiantribe']), "native_hawaiian_owned_business": self.parse_first_character(row['isnativehawaiianownedorganizationorfirm']), "tribally_owned_business": self.parse_first_character(row['istriballyownedfirm']), "asian_pacific_american_owned_business": self.parse_first_character(row['apaobflag']), "black_american_owned_business": self.parse_first_character(row['baobflag']), "hispanic_american_owned_business": self.parse_first_character(row['haobflag']), "native_american_owned_business": self.parse_first_character(row['naobflag']), "historically_black_college": self.parse_first_character(row['hbcuflag']), "subcontinent_asian_asian_indian_american_owned_business": self.parse_first_character(row['saaobflag']), "us_local_government": self.parse_first_character(row['islocalgovernmentowned']), "division_name": self.parse_first_character(row['divisionname']), "division_number": self.parse_first_character(row['divisionnumberorofficecode']), "historically_underutilized_business_zone": self.parse_first_character(row['hubzoneflag']), "corporate_entity_tax_exempt": self.parse_first_character(row['iscorporateentitytaxexempt']), "corporate_entity_not_tax_exempt": self.parse_first_character(row['iscorporateentitynottaxexempt']), "council_of_governments": self.parse_first_character(row['iscouncilofgovernments']), "dot_certified_disadvantage": self.parse_first_character(row['isdotcertifieddisadvantagedbusinessenterprise']), "for_profit_organization": self.parse_first_character(row['isforprofitorganization']), "foundation": self.parse_first_character(row['isfoundation']), "joint_venture_women_owned_small_business": self.parse_first_character(row['isjointventurewomenownedsmallbusiness']), "limited_liability_corporation": self.parse_first_character(row['islimitedliabilitycorporation']), "other_not_for_profit_organization": self.parse_first_character(row['isothernotforprofitorganization']), "other_minority_owned_business": self.parse_first_character(row['isotherminorityowned']), "partnership_or_limited_liability_partnership": self.parse_first_character(row['ispartnershiporlimitedliabilitypartnership']), "sole_proprietorship": self.parse_first_character(row['issoleproprietorship']), "subchapter_scorporation": self.parse_first_character(row['issubchapterscorporation']), "nonprofit_organization": self.parse_first_character(row['nonprofitorganizationflag']), } le, created = LegalEntity.get_or_create_by_duns(duns=row['dunsnumber']) if created: # Update from our recipient dictionary for attr, value in recipient_dict.items(): setattr(le, attr, value) le.save()