def test_get_or_create_location_creates_new_locations(): """If no location is found, we create a new one""" ref = mommy.make( 'references.RefCountryCode', country_code='USA', _fill_optional=True) row = dict( vendorcountrycode='USA', zipcode='12345-6789', streetaddress='Addy1', streetaddress2='Addy2', streetaddress3=None, vendor_state_code='ST', city='My Town') # this canonicalization step runs during load_submission, also row = canonicalize_location_dict(row) # can't find it because we're looking at the US fields assert Location.objects.count() == 0 helpers.get_or_create_location( row, load_usaspending_contracts.location_mapper_vendor) assert Location.objects.count() == 1 loc = Location.objects.all().first() assert loc.location_country_code == ref assert loc.zip5 == '12345' assert loc.zip_last4 == '6789' assert loc.address_line1 == 'ADDY1' assert loc.address_line2 == 'ADDY2' assert loc.address_line3 is None assert loc.state_code == 'ST' assert loc.city_name == 'MY TOWN'
def test_get_or_create_fa_recipient_location_creates_new_locations(): """If no location is found, we create a new one For financial assistance recipient locations.""" ref = mommy.make( 'references.RefCountryCode', country_code='USA', _fill_optional=True) row = dict( recipient_country_code='USA', recipient_zip='12345-6789', recipient_state_code='OH', recipient_county_name='MONTGOMERY', ) # can't find it because we're looking at the US fields assert Location.objects.count() == 0 helpers.get_or_create_location( row, load_usaspending_assistance.location_mapper_fin_assistance_recipient) assert Location.objects.count() == 1 loc = Location.objects.all().first() assert loc.location_country_code == ref assert loc.zip5 == '12345' assert loc.zip_last4 == '6789' assert loc.state_code == 'OH' assert loc.county_name == 'MONTGOMERY'
def test_get_or_create_fa_place_of_performance_location_creates_new_locations( ): """If no location is found, we create a new one For financial assistance place of performance locations.""" row = dict( principal_place_country_code='USA', principal_place_zip='12345-6789', principal_place_state_code='OH', principal_place_cc='MONTGOMERY', ) # can't find it because we're looking at the US fields assert Location.objects.count() == 0 helpers.get_or_create_location( row, load_usaspending_assistance. location_mapper_fin_assistance_principal_place) assert Location.objects.count() == 1 loc = Location.objects.all().first() assert loc.location_country_code == 'USA' assert loc.zip5 == '12345' assert loc.zip_last4 == '6789' assert loc.state_code == 'OH' assert loc.county_name == 'MONTGOMERY'
def test_get_or_create_location_creates_new_locations(): """If no location is found, we create a new one""" row = dict( vendorcountrycode='USA', zipcode='12345-6789', streetaddress='Addy1', streetaddress2='Addy2', streetaddress3=None, vendor_state_code='ST', city='My Town') # this canonicalization step runs during load_submission, also row = canonicalize_location_dict(row) # can't find it because we're looking at the US fields assert Location.objects.count() == 0 helpers.get_or_create_location( row, load_usaspending_contracts.location_mapper_vendor) assert Location.objects.count() == 1 loc = Location.objects.all().first() assert loc.location_country_code == 'USA' assert loc.zip5 == '12345' assert loc.zip_last4 == '6789' assert loc.address_line1 == 'ADDY1' assert loc.address_line2 == 'ADDY2' assert loc.address_line3 is None assert loc.state_code == 'ST' assert loc.city_name == 'MY TOWN'
def test_get_or_create_fa_place_of_performance_location_creates_new_locations( ): """If no location is found, we create a new one For financial assistance place of performance locations.""" row = dict( principal_place_country_code='USA', principal_place_zip='12345-6789', principal_place_state_code='OH', principal_place_cc='MONTGOMERY', ) # can't find it because we're looking at the US fields assert Location.objects.count() == 0 helpers.get_or_create_location( row, load_usaspending_assistance. location_mapper_fin_assistance_principal_place) assert Location.objects.count() == 1 loc = Location.objects.all().first() assert loc.location_country_code == 'USA' assert loc.zip5 == '12345' assert loc.zip_last4 == '6789' assert loc.state_code == 'OH' assert loc.county_name == 'MONTGOMERY'
def test_get_or_create_location_non_usa(): """We should query different fields if it's a non-US row""" ref = mommy.make( 'references.RefCountryCode', country_code='UAE', _fill_optional=True) expected = mommy.make( 'references.Location', location_country_code=ref, zip5='12345', zip_last4='6789', # @todo: city_name has a different length than foreign_city_name, so # we can't use the random value city_name='AAAAAAAA', _fill_optional=True) row = dict( vendorcountrycode='UAE', zipcode='12345-6789', streetaddress=expected.address_line1, streetaddress2=expected.address_line2, streetaddress3=expected.address_line3, state=expected.state_code, city=expected.city_name) # can't find it because we're looking at the POP fields assert helpers.get_or_create_location( row, load_usaspending_contracts. location_mapper_place_of_performance) != expected
def test_get_or_create_location_non_usa(): """We should query different fields if it's a non-US row""" ref = mommy.make('references.RefCountryCode', country_code='UAE', _fill_optional=True) expected = mommy.make( 'references.Location', location_country_code=ref, zip5='12345', zip_last4='6789', # @todo: can't use _fill_optional on this model because data # will exceed allowable index length address_line1='line one of address', address_line2='line two of address', address_line3='line thre of address', state_code='GG', city_name='AAAAAAAA') row = dict(vendorcountrycode='UAE', zipcode='12345-6789', streetaddress=expected.address_line1, streetaddress2=expected.address_line2, streetaddress3=expected.address_line3, state=expected.state_code, city=expected.city_name) # can't find it because we're looking at the POP fields assert helpers.get_or_create_location( row, load_usaspending_contracts.location_mapper_place_of_performance ) != expected
def test_get_or_create_location_non_usa(): """We should query different fields if it's a non-US row""" expected = mommy.make( 'references.Location', location_country_code='UAE', zip5='12345', zip_last4='6789', # @todo: can't use _fill_optional on this model because data # will exceed allowable index length address_line1='line one of address', address_line2='line two of address', address_line3='line thre of address', state_code='GG', city_name='AAAAAAAA') row = dict( vendorcountrycode='UAE', zipcode='12345-6789', streetaddress=expected.address_line1, streetaddress2=expected.address_line2, streetaddress3=expected.address_line3, state=expected.state_code, city=expected.city_name) # can't find it because we're looking at the POP fields assert helpers.get_or_create_location( row, load_usaspending_contracts. location_mapper_place_of_performance) != expected
def get_or_create_recipient(self, row): recipient_dict = { "location_id": h.get_or_create_location( row, mapper=location_mapper_fin_assistance_recipient).location_id, "recipient_name": row['recipient_name'], "recipient_unique_id": row['duns_no'], } recipient_type = row.get("recipient_type", ":").split(":")[1].strip() recipient_dict.update(self.recipient_flags_by_type(recipient_type)) le = LegalEntity.objects.filter( recipient_unique_id=row['duns_no']).first() if not le: le = LegalEntity.objects.create(**recipient_dict) return le
def get_or_create_recipient(self, row): recipient_dict = { "location_id": h.get_or_create_location( row, mapper=location_mapper_fin_assistance_recipient).location_id, "recipient_name": row['recipient_name'], } recipient_type = row.get("recipient_type", ":").split(":")[1].strip() recipient_dict.update(self.recipient_flags_by_type(recipient_type)) le, created = LegalEntity.get_or_create_by_duns(duns=row['duns_no']) if created: # Update from our recipient dictionary for attr, value in recipient_dict.items(): setattr(le, attr, value) le.save() return le
def load_subawards(submission_attributes, db_cursor): """ Loads File F from the broker. db_cursor should be the db_cursor for Broker """ # A list of award id's to update the subaward accounts and totals on award_ids_to_update = set() # Get a list of PIIDs from this submission awards_for_sub = Award.objects.filter(transaction__submission=submission_attributes).distinct() piids = list(awards_for_sub.values_list("piid", flat=True)) fains = list(awards_for_sub.values_list("fain", flat=True)) # This allows us to handle an empty list in the SQL without changing the query piids.append(None) fains.append(None) # D1 File F db_cursor.execute(D1_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(piids)]) d1_f_data = dictfetchall(db_cursor) logger.info("Creating D1 F File Entries (Subcontracts): {}".format(len(d1_f_data))) d1_create_count = 0 d1_update_count = 0 d1_empty_count = 0 for row in d1_f_data: if row['subcontract_num'] is None: if row['id'] is not None and row['subcontract_amount'] is not None: logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"])) logger.warn("Failing row: {}".format(row)) else: d1_empty_count += 1 continue # Get the agency agency = get_valid_awarding_agency(row) if not agency: logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(row['subcontract_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])) continue # Find the award to attach this sub-contract to # We perform this lookup by finding the Award containing a transaction with # a matching parent award id, piid, and submission attributes award = Award.objects.filter(awarding_agency=agency, transaction__submission=submission_attributes, transaction__contract_data__piid=row['piid'], transaction__contract_data__isnull=False, transaction__contract_data__parent_award_id=row['parent_award_id']).distinct().order_by("-date_signed").first() # We don't have a matching award for this subcontract, log a warning and continue to the next row if not award: logger.warn("Subcontract number {} cannot find matching award with piid {}, parent_award_id {}; skipping...".format(row['subcontract_num'], row['piid'], row['parent_award_id'])) continue award_ids_to_update.add(award.id) # Find the recipient by looking up by duns recipient, created = LegalEntity.get_or_create_by_duns(duns=row['duns']) if created: recipient.parent_recipient_unique_id = row['parent_duns'] recipient.recipient_name = row['company_name'] recipient.location = get_or_create_location(row, location_d1_recipient_mapper) recipient.save() # Get or create POP place_of_performance = get_or_create_location(row, pop_mapper) d1_f_dict = { 'award': award, 'recipient': recipient, 'submission': submission_attributes, 'data_source': "DBR", 'cfda': None, 'awarding_agency': award.awarding_agency, 'funding_agency': award.funding_agency, 'place_of_performance': place_of_performance, 'subaward_number': row['subcontract_num'], 'amount': row['subcontract_amount'], 'description': row['overall_description'], 'recovery_model_question1': row['recovery_model_q1'], 'recovery_model_question2': row['recovery_model_q2'], 'action_date': row['subcontract_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'], 'naics': row['naics'], 'naics_description': row['naics_description'], } # Create the subaward subaward, created = Subaward.objects.update_or_create(subaward_number=row['subcontract_num'], award=award, defaults=d1_f_dict) if created: d1_create_count += 1 else: d1_update_count += 1 # D2 File F db_cursor.execute(D2_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(fains)]) d2_f_data = dictfetchall(db_cursor) logger.info("Creating D2 F File Entries (Subawards): {}".format(len(d2_f_data))) d2_create_count = 0 d2_update_count = 0 d2_empty_count = 0 for row in d2_f_data: if row['subaward_num'] is None: if row['id'] is not None and row['subaward_amount'] is not None: logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"])) logger.warn("Failing row: {}".format(row)) else: d2_empty_count += 1 continue agency = get_valid_awarding_agency(row) if not agency: logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(row['subaward_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])) continue # Find the award to attach this sub-award to # We perform this lookup by finding the Award containing a transaction with # a matching fain and submission. If this fails, try submission and uri if row['fain'] and len(row['fain']) > 0: award = Award.objects.filter(awarding_agency=agency, transaction__submission=submission_attributes, transaction__assistance_data__isnull=False, transaction__assistance_data__fain=row['fain']).distinct().order_by("-date_signed").first() # Couldn't find a match on FAIN, try URI if it exists if not award and row['uri'] and len(row['uri']) > 0: award = Award.objects.filter(awarding_agency=agency, transaction__submission=submission_attributes, transaction__assistance_data__isnull=False, transaction__assistance_data__uri=row['uri']).distinct().first() # We don't have a matching award for this subcontract, log a warning and continue to the next row if not award: logger.warn("Subaward number {} cannot find matching award with fain {}, uri {}; skipping...".format(row['subaward_num'], row['fain'], row['uri'])) continue award_ids_to_update.add(award.id) # Find the recipient by looking up by duns recipient, created = LegalEntity.get_or_create_by_duns(duns=row['duns']) if created: recipient_name = row['awardee_name'] if recipient_name is None: recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" recipient.recipient_name = recipient_name recipient.parent_recipient_unique_id = row['parent_duns'] recipient.location = get_or_create_location(row, location_d2_recipient_mapper) recipient.save() # Get or create POP place_of_performance = get_or_create_location(row, pop_mapper) # Get CFDA Program cfda = Cfda.objects.filter(program_number=row['cfda_number']).first() d2_f_dict = { 'award': award, 'recipient': recipient, 'submission': submission_attributes, 'data_source': "DBR", 'cfda': cfda, 'awarding_agency': award.awarding_agency, 'funding_agency': award.funding_agency, 'place_of_performance': place_of_performance, 'subaward_number': row['subaward_num'], 'amount': row['subaward_amount'], 'description': row['project_description'], 'recovery_model_question1': row['compensation_q1'], 'recovery_model_question2': row['compensation_q2'], 'action_date': row['subaward_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'], 'naics': None, 'naics_description': None, } # Create the subaward subaward, created = Subaward.objects.update_or_create(subaward_number=row['subaward_num'], award=award, defaults=d2_f_dict) if created: d2_create_count += 1 else: d2_update_count += 1 # Update Award objects with subaward aggregates update_award_subawards(tuple(award_ids_to_update)) logger.info( """Submission {} Subcontracts created: {} Subcontracts updated: {} Empty subcontract rows: {} Subawards created: {} Subawards updated: {} Empty subaward rows: {}""".format(submission_attributes.broker_submission_id, d1_create_count, d1_update_count, d1_empty_count, d2_create_count, d2_update_count, d2_empty_count))
def handle(self, *args, **options): h.clear_caches() csv_file = options['file'][0] self.logger.info("Starting load for file {}".format(csv_file)) # Create the csv reader reader = CsvDataReader(csv_file) # Create a new submission attributes object for this timestamp subattr = SubmissionAttributes() subattr.usaspending_update = datetime.now() subattr.save() # Create lists to hold model instances for bulk insert txn_list = [] txn_assistance_list = [] for idx, row in enumerate(reader): if len(reader) % 1000 == 0: self.logger.info("Read row {}".format(len(reader))) row = h.cleanse_values(row) awarding_agency = self.get_awarding_agency( row) # todo: use agency dict? # Create the transaction object for this row txn_dict = { "submission": subattr, "action_date": h.convert_date(row['obligation_action_date']), "action_type": h.up2colon(row['action_type']), "award": self.get_or_create_award(row, awarding_agency=awarding_agency), "awarding_agency": awarding_agency, "description": row["project_description"], # ?? account_title is anther contender? "data_source": "USA", "federal_action_obligation": row["fed_funding_amount"], "last_modified_date": h.convert_date(row['last_modified_date']), "modification_number": row["federal_award_mod"], # ?? "period_of_performance_start_date": h.convert_date(row['starting_date']), "period_of_performance_current_end_date": h.convert_date(row['ending_date']), "place_of_performance": h.get_or_create_location( row, location_mapper_fin_assistance_principal_place), "recipient": self.get_or_create_recipient(row), "type": h.up2colon(row['assistance_type']), "usaspending_unique_transaction_id": row["unique_transaction_id"], # ??"funding_agency_id": # ?? "certified date": } txn = Transaction(**txn_dict) txn.fiscal_year = fy(txn.action_date) txn_list.append(txn) # Create the transaction contract object for this row txn_assistance_dict = { "submission": subattr, "fain": row["federal_award_id"], "uri": row["uri"], "cfda": Cfda.objects.filter( program_number=row["cfda_program_num"]).first(), "correction_late_delete_indicator": h.up2colon(row['correction_late_ind']), "face_value_loan_guarantee": row["face_loan_guran"], "fiscal_year_and_quarter_correction": row["fyq_correction"], "non_federal_funding_amount": row["non_fed_funding_amount"], "original_loan_subsidy_cost": row["orig_sub_guran"], # ?? "record_type": int(h.up2colon(row['record_type'])), "sai_number": row["sai_number"], "submitted_type": "C", # ?? For CSV? } # ?? business_funds_indicator # ?? reporting period start/end?? txn_assistance = TransactionAssistance(**txn_assistance_dict) txn_assistance_list.append(txn_assistance) # Bulk insert transaction rows self.logger.info( "Starting Transaction bulk insert ({} records)".format( len(txn_list))) Transaction.objects.bulk_create(txn_list) self.logger.info("Completed Transaction bulk insert") # Update txn assistance list with newly-inserted transactions award_id_list = [] # we'll need this when updating the awards later on for idx, t in enumerate(txn_assistance_list): t.transaction = txn_list[idx] award_id_list.append(txn_list[idx].award_id) # Bulk insert transaction assistance rows self.logger.info( "Starting TransactionAssistance bulk insert ({} records)".format( len(txn_assistance_list))) TransactionAssistance.objects.bulk_create(txn_assistance_list) self.logger.info("Completed TransactionAssistance bulk insert") # Update awards to reflect latest transaction information # (note that this can't be done via signals or a save() # override in the model itself, because those aren't # triggered by a bulk update self.logger.info("Starting Awards update") count = update_awards(tuple(award_id_list)) update_contract_awards(tuple(award_id_list)) update_model_description_fields() self.logger.info("Completed Awards update ({} records)".format(count))
def load_subawards(submission_attributes, awards_touched, db_cursor): """ Loads File F from the broker. db_cursor should be the db_cursor for Broker """ # A list of award id's to update the subaward accounts and totals on award_ids_to_update = set() # Get a list of PIIDs from this submission # TODO: URIS awards_touched = [Award.objects.filter(id=award_id).first() for award_id in awards_touched] piids = list([award.piid for award in awards_touched if award.piid]) fains = list([award.fain for award in awards_touched if award.fain]) uris = list([award.uri for award in awards_touched if award.uri]) # This allows us to handle an empty list in the SQL without changing the query piids.append(None) fains.append(None) # D1 File F db_cursor.execute(D1_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(piids)]) d1_f_data = dictfetchall(db_cursor) logger.info("Creating D1 F File Entries (Subcontracts): {}".format(len(d1_f_data))) d1_create_count = 0 d1_update_count = 0 d1_empty_count = 0 for row in d1_f_data: if row['subcontract_num'] is None: if row['id'] is not None and row['subcontract_amount'] is not None: logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"])) logger.warn("Failing row: {}".format(row)) else: d1_empty_count += 1 continue # Get the agency agency = get_valid_awarding_agency(row) if not agency: logger.warn( "Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format( row['subcontract_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])) continue # Find the award to attach this sub-contract to # We perform this lookup by finding the Award containing a transaction with a matching parent award id, piid, # and submission attributes award = Award.objects.filter( awarding_agency=agency, latest_transaction__contract_data__piid=row['piid'], latest_transaction__contract_data__parent_award_id=row['parent_award_id']).distinct().order_by( "-date_signed").first() # We don't have a matching award for this subcontract, log a warning and continue to the next row if not award: logger.warn( "Subcontract number {} cannot find matching award with piid {}, parent_award_id {}; skipping...".format( row['subcontract_num'], row['piid'], row['parent_award_id'])) continue award_ids_to_update.add(award.id) # Get or create unique DUNS-recipient pair recipient, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['duns'], recipient_name=row['company_name'] ) if created: recipient.parent_recipient_unique_id = row['parent_duns'] recipient.location = get_or_create_location(row, location_d1_recipient_mapper) recipient.save() # Get or create POP place_of_performance = get_or_create_location(row, pop_mapper) d1_f_dict = { 'award': award, 'recipient': recipient, 'data_source': "DBR", 'cfda': None, 'awarding_agency': award.awarding_agency, 'funding_agency': award.funding_agency, 'place_of_performance': place_of_performance, 'subaward_number': row['subcontract_num'], 'amount': row['subcontract_amount'], 'description': row['overall_description'], 'recovery_model_question1': row['recovery_model_q1'], 'recovery_model_question2': row['recovery_model_q2'], 'action_date': row['subcontract_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'] } # Create the subaward subaward, created = Subaward.objects.update_or_create(subaward_number=row['subcontract_num'], award=award, defaults=d1_f_dict) if created: d1_create_count += 1 else: d1_update_count += 1 # D2 File F db_cursor.execute(D2_FILE_F_QUERY, [tuple(fains), tuple(uris)]) d2_f_data = dictfetchall(db_cursor) logger.info("Creating D2 F File Entries (Subawards): {}".format(len(d2_f_data))) d2_create_count = 0 d2_update_count = 0 d2_empty_count = 0 for row in d2_f_data: if row['subaward_num'] is None: if row['id'] is not None and row['subaward_amount'] is not None: logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"])) logger.warn("Failing row: {}".format(row)) else: d2_empty_count += 1 continue agency = get_valid_awarding_agency(row) if not agency: logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier " "code {}".format(row['subaward_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])) continue # Find the award to attach this sub-award to # We perform this lookup by finding the Award containing a transaction with a matching fain and submission. # If this fails, try submission and uri if row['fain'] and len(row['fain']) > 0: award = Award.objects.filter(awarding_agency=agency, latest_transaction__assistance_data__fain=row['fain']).\ distinct().order_by("-date_signed").first() # Couldn't find a match on FAIN, try URI if it exists if not award and row['uri'] and len(row['uri']) > 0: award = Award.objects.filter(awarding_agency=agency, latest_transaction__assistance_data__uri=row['uri']).distinct().first() # Try both if not award and row['fain'] and len(row['fain']) > 0 and row['uri'] and len(row['uri']) > 0: award = Award.objects.filter(awarding_agency=agency, latest_transaction__assistance_data__fain=row['fain'], latest_transaction__assistance_data__uri=row['uri']).\ distinct().order_by("-date_signed").first() # We don't have a matching award for this subcontract, log a warning and continue to the next row if not award: logger.warn("Subaward number {} cannot find matching award with fain {}, uri {}; " "skipping...".format(row['subaward_num'], row['fain'], row['uri'])) continue award_ids_to_update.add(award.id) recipient_name = row['awardee_name'] if recipient_name is None: recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Get or create unique DUNS-recipient pair recipient, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['duns'], recipient_name=recipient_name ) if created: recipient.parent_recipient_unique_id = row['parent_duns'] recipient.location = get_or_create_location(row, location_d2_recipient_mapper) recipient.save() # Get or create POP place_of_performance = get_or_create_location(row, pop_mapper) # Get CFDA Program cfda = Cfda.objects.filter(program_number=row['cfda_number']).first() d2_f_dict = { 'award': award, 'recipient': recipient, 'data_source': "DBR", 'cfda': cfda, 'awarding_agency': award.awarding_agency, 'funding_agency': award.funding_agency, 'place_of_performance': place_of_performance, 'subaward_number': row['subaward_num'], 'amount': row['subaward_amount'], 'description': row['project_description'], 'recovery_model_question1': row['compensation_q1'], 'recovery_model_question2': row['compensation_q2'], 'action_date': row['subaward_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'] } # Create the subaward subaward, created = Subaward.objects.update_or_create( subaward_number=row['subaward_num'], award=award, defaults=d2_f_dict ) if created: d2_create_count += 1 else: d2_update_count += 1 # Update Award objects with subaward aggregates update_award_subawards(tuple(award_ids_to_update)) logger.info( """Submission {} Subcontracts created: {} Subcontracts updated: {} Empty subcontract rows: {} Subawards created: {} Subawards updated: {} Empty subaward rows: {}""".format(submission_attributes.broker_submission_id, d1_create_count, d1_update_count, d1_empty_count, d2_create_count, d2_update_count, d2_empty_count))
def handle(self, *args, **options): csv_file = options['file'][0] self.logger.info("Starting load for file {}".format(csv_file)) # Create the csv reader reader = CsvDataReader(csv_file) # Create a new submission attributes object for this timestamp subattr = SubmissionAttributes() subattr.usaspending_update = datetime.now() subattr.save() # Create lists to hold model instances for bulk insert txn_list = [] txn_contract_list = [] subtier_agency_dict = h.get_subtier_agency_dict() # Store some additional support data needed for the laod award_type_dict = {a[0]: a[1] for a in AWARD_TYPES} contract_pricing_dict = {c[0]: c[1] for c in CONTRACT_PRICING_TYPES} for idx, row in enumerate(reader): if len(reader) % 1000 == 0: self.logger.info("Read row {}".format(len(reader))) row = h.cleanse_values(row) awarding_agency_id = self.get_agency_id(row["contractingofficeagencyid"], subtier_agency_dict) # Create the transaction object for this row txn_dict = { "action_date": h.convert_date(row['signeddate']), "award": self.get_or_create_award(row, awarding_agency_id), "awarding_agency_id": awarding_agency_id, "data_source": "USA", "description": row["descriptionofcontractrequirement"], "federal_action_obligation": row["dollarsobligated"], "funding_agency_id": self.get_agency_id(row["fundingrequestingagencyid"], subtier_agency_dict), "last_modified_date": h.convert_date(row['last_modified_date']), "modification_number": row["modnumber"], "place_of_performance": h.get_or_create_location( row, mapper=location_mapper_place_of_performance), "period_of_performance_current_end_date": h.convert_date(row['currentcompletiondate']), "period_of_performance_start_date": h.convert_date(row['effectivedate']), "recipient": self.get_or_create_recipient(row), "submission": subattr, "type": evaluate_contract_award_type(row), "type_description": award_type_dict.get(evaluate_contract_award_type(row)), "usaspending_unique_transaction_id": row["unique_transaction_id"] } txn = Transaction(**txn_dict) txn_list.append(txn) # Create the transaction contract object for this row txn_contract_dict = { "submission": subattr, "piid": row['piid'], "parent_award_id": row['idvpiid'], "current_total_value_award": h.parse_numeric_value(row["baseandexercisedoptionsvalue"]), "period_of_performance_potential_end_date": h.convert_date(row['ultimatecompletiondate']), "potential_total_value_of_award": h.parse_numeric_value(row["baseandalloptionsvalue"]), "epa_designated_product": self.parse_first_character(row['useofepadesignatedproducts']), "gfe_gfp": h.up2colon(row['gfe_gfp']), "cost_or_pricing_data": h.up2colon(row['costorpricingdata']), "type_of_contract_pricing": h.up2colon(row['typeofcontractpricing']), "type_of_contract_pricing_description": contract_pricing_dict.get(h.up2colon(row['typeofcontractpricing'])), "multiple_or_single_award_idv": h.up2colon(row['multipleorsingleawardidc']), "naics": h.up2colon(row['nationalinterestactioncode']), "dod_claimant_program_code": h.up2colon(row['claimantprogramcode']), "commercial_item_acquisition_procedures": h.up2colon( row['commercialitemacquisitionprocedures']), "commercial_item_test_program": h.up2colon(row['commercialitemtestprogram']), "consolidated_contract": h.up2colon(row['consolidatedcontract']), "contingency_humanitarian_or_peacekeeping_operation": h.up2colon( row['contingencyhumanitarianpeacekeepingoperation']), "contract_bundling": h.up2colon(row['contractbundling']), "contract_financing": h.up2colon(row['contractfinancing']), "contracting_officers_determination_of_business_size": h.up2colon( row['contractingofficerbusinesssizedetermination']), "country_of_product_or_service_origin": h.up2colon(row['countryoforigin']), "davis_bacon_act": h.up2colon(row['davisbaconact']), "evaluated_preference": h.up2colon(row['evaluatedpreference']), "extent_competed": h.up2colon(row['extentcompeted']), "information_technology_commercial_item_category": h.up2colon( row['informationtechnologycommercialitemcategory']), "interagency_contracting_authority": h.up2colon(row['interagencycontractingauthority']), "local_area_set_aside": h.up2colon(row['localareasetaside']), "purchase_card_as_payment_method": h.up2colon(row['purchasecardaspaymentmethod']), "multi_year_contract": h.up2colon(row['multiyearcontract']), "national_interest_action": h.up2colon(row['nationalinterestactioncode']), "number_of_actions": h.up2colon(row['numberofactions']), "number_of_offers_received": h.up2colon(row['numberofoffersreceived']), "performance_based_service_acquisition": h.up2colon(row['performancebasedservicecontract']), "place_of_manufacture": h.up2colon(row['placeofmanufacture']), "product_or_service_code": h.up2colon(row['productorservicecode']), "recovered_materials_sustainability": h.up2colon(row['recoveredmaterialclauses']), "research": h.up2colon(row['research']), "sea_transportation": h.up2colon(row['seatransportation']), "service_contract_act": h.up2colon(row['servicecontractact']), "small_business_competitiveness_demonstration_program": self.parse_first_character( row['smallbusinesscompetitivenessdemonstrationprogram']), "solicitation_procedures": h.up2colon(row['solicitationprocedures']), "subcontracting_plan": h.up2colon(row['subcontractplan']), "type_set_aside": h.up2colon(row['typeofsetaside']), "walsh_healey_act": h.up2colon(row['walshhealyact']), "rec_flag": self.parse_first_character(h.up2colon(row['rec_flag'])), "type_of_idc": self.parse_first_character(row['typeofidc']), "a76_fair_act_action": self.parse_first_character(row['a76action']), "clinger_cohen_act_planning": self.parse_first_character(row['clingercohenact']), "cost_accounting_standards": self.parse_first_character( row['costaccountingstandardsclause']), "fed_biz_opps": self.parse_first_character(row['fedbizopps']), "foreign_funding": self.parse_first_character(row['fundedbyforeignentity']), "major_program": self.parse_first_character(row['majorprogramcode']), "program_acronym": self.parse_first_character(row['programacronym']), "referenced_idv_modification_number": self.parse_first_character( row['idvmodificationnumber']), "transaction_number": self.parse_first_character(row['transactionnumber']), "solicitation_identifier": self.parse_first_character(row['solicitationid']) } txn_contract = TransactionContract(**txn_contract_dict) txn_contract_list.append(txn_contract) # Bulk insert transaction rows self.logger.info("Starting Transaction bulk insert ({} records)".format(len(txn_list))) Transaction.objects.bulk_create(txn_list) self.logger.info("Completed Transaction bulk insert") # Update txn contract list with newly-inserted transactions award_id_list = [] # we'll need this when updating the awards later on for idx, t in enumerate(txn_contract_list): # add transaction info to this TransactionContract object t.transaction = txn_list[idx] # add the corresponding award id to a list we'll use when batch-updating award data award_id_list.append(txn_list[idx].award_id) # Bulk insert transaction contract rows self.logger.info("Starting TransactionContract bulk insert ({} records)".format(len(txn_contract_list))) TransactionContract.objects.bulk_create(txn_contract_list) self.logger.info("Completed TransactionContract bulk insert") # Update awards to reflect latest transaction information # (note that this can't be done via signals or a save() # override in the model itself, because those aren't # triggered by a bulk update self.logger.info("Starting Awards update") count = update_awards(tuple(award_id_list)) update_contract_awards(tuple(award_id_list)) self.logger.info("Completed Awards update ({} records)".format(count))
def get_or_create_recipient(self, row): recipient_dict = { "location_id": h.get_or_create_location(row, mapper=location_mapper_vendor).location_id, "recipient_name": row['vendorname'], "vendor_phone_number": row['phoneno'], "vendor_fax_number": row['faxno'], "recipient_unique_id": row['dunsnumber'], "parent_recipient_unique_id": row['parentdunsnumber'], "city_local_government": self.parse_first_character(row['iscitylocalgovernment']), "county_local_government": self.parse_first_character(row['iscountylocalgovernment']), "inter_municipal_local_government": self.parse_first_character(row['isintermunicipallocalgovernment']), "local_government_owned": self.parse_first_character(row['islocalgovernmentowned']), "municipality_local_government": self.parse_first_character(row['ismunicipalitylocalgovernment']), "school_district_local_government": self.parse_first_character(row['isschooldistrictlocalgovernment']), "township_local_government": self.parse_first_character(row['istownshiplocalgovernment']), "federal_agency": self.parse_first_character(row['isfederalgovernmentagency']), "us_federal_government": self.parse_first_character(row['federalgovernmentflag']), "federally_funded_research_and_development_corp": self.parse_first_character(row['isfederallyfundedresearchanddevelopmentcorp']), "us_tribal_government": self.parse_first_character(row['istriballyownedfirm']), "c8a_program_participant": self.parse_first_character(row['firm8aflag']), "foreign_government": self.parse_first_character(row['isforeigngovernment']), "community_developed_corporation_owned_firm": self.parse_first_character(row['iscommunitydevelopedcorporationownedfirm']), "labor_surplus_area_firm": self.parse_first_character(row['islaborsurplusareafirm']), "small_agricultural_cooperative": self.parse_first_character(row['issmallagriculturalcooperative']), "international_organization": self.parse_first_character(row['isinternationalorganization']), "c1862_land_grant_college": self.parse_first_character(row['is1862landgrantcollege']), "c1890_land_grant_college": self.parse_first_character(row['is1890landgrantcollege']), "c1994_land_grant_college": self.parse_first_character(row['is1994landgrantcollege']), "minority_institution": self.parse_first_character(row['minorityinstitutionflag']), "private_university_or_college": self.parse_first_character(row['isprivateuniversityorcollege']), "school_of_forestry": self.parse_first_character(row['isschoolofforestry']), "state_controlled_institution_of_higher_learning": self.parse_first_character(row['isstatecontrolledinstitutionofhigherlearning']), "tribal_college": self.parse_first_character(row['istribalcollege']), "veterinary_college": self.parse_first_character(row['isveterinarycollege']), "educational_institution": self.parse_first_character(row['educationalinstitutionflag']), "alaskan_native_servicing_institution": self.parse_first_character(row['isalaskannativeownedcorporationorfirm']), "community_development_corporation": self.parse_first_character(row['iscommunitydevelopmentcorporation']), "native_hawaiian_servicing_institution": self.parse_first_character(row['isnativehawaiianownedorganizationorfirm']), "domestic_shelter": self.parse_first_character(row['isdomesticshelter']), "manufacturer_of_goods": self.parse_first_character(row['ismanufacturerofgoods']), "hospital_flag": self.parse_first_character(row['hospitalflag']), "veterinary_hospital": self.parse_first_character(row['isveterinaryhospital']), "hispanic_servicing_institution": self.parse_first_character(row['ishispanicservicinginstitution']), "woman_owned_business": self.parse_first_character(row['womenownedflag']), "minority_owned_business": self.parse_first_character(row['minorityownedbusinessflag']), "women_owned_small_business": self.parse_first_character(row['iswomenownedsmallbusiness']), "economically_disadvantaged_women_owned_small_business": self.parse_first_character(row['isecondisadvwomenownedsmallbusiness']), "joint_venture_economic_disadvantaged_women_owned_small_bus": self.parse_first_character(row['isjointventureecondisadvwomenownedsmallbusiness']), "veteran_owned_business": self.parse_first_character(row['veteranownedflag']), "airport_authority": self.parse_first_character(row['isairportauthority']), "housing_authorities_public_tribal": self.parse_first_character(row['ishousingauthoritiespublicortribal']), "interstate_entity": self.parse_first_character(row['isinterstateentity']), "planning_commission": self.parse_first_character(row['isplanningcommission']), "port_authority": self.parse_first_character(row['isportauthority']), "transit_authority": self.parse_first_character(row['istransitauthority']), "foreign_owned_and_located": self.parse_first_character(row['isforeignownedandlocated']), "american_indian_owned_business": self.parse_first_character(row['aiobflag']), "alaskan_native_owned_corporation_or_firm": self.parse_first_character(row['isalaskannativeownedcorporationorfirm']), "indian_tribe_federally_recognized": self.parse_first_character(row['isindiantribe']), "native_hawaiian_owned_business": self.parse_first_character(row['isnativehawaiianownedorganizationorfirm']), "tribally_owned_business": self.parse_first_character(row['istriballyownedfirm']), "asian_pacific_american_owned_business": self.parse_first_character(row['apaobflag']), "black_american_owned_business": self.parse_first_character(row['baobflag']), "hispanic_american_owned_business": self.parse_first_character(row['haobflag']), "native_american_owned_business": self.parse_first_character(row['naobflag']), "historically_black_college": self.parse_first_character(row['hbcuflag']), "subcontinent_asian_asian_indian_american_owned_business": self.parse_first_character(row['saaobflag']), "us_local_government": self.parse_first_character(row['islocalgovernmentowned']), "division_name": self.parse_first_character(row['divisionname']), "division_number": self.parse_first_character(row['divisionnumberorofficecode']), "historically_underutilized_business_zone": self.parse_first_character(row['hubzoneflag']), "corporate_entity_tax_exempt": self.parse_first_character(row['iscorporateentitytaxexempt']), "corporate_entity_not_tax_exempt": self.parse_first_character(row['iscorporateentitynottaxexempt']), "council_of_governments": self.parse_first_character(row['iscouncilofgovernments']), "dot_certified_disadvantage": self.parse_first_character(row['isdotcertifieddisadvantagedbusinessenterprise']), "for_profit_organization": self.parse_first_character(row['isforprofitorganization']), "foundation": self.parse_first_character(row['isfoundation']), "joint_venture_women_owned_small_business": self.parse_first_character(row['isjointventurewomenownedsmallbusiness']), "limited_liability_corporation": self.parse_first_character(row['islimitedliabilitycorporation']), "other_not_for_profit_organization": self.parse_first_character(row['isothernotforprofitorganization']), "other_minority_owned_business": self.parse_first_character(row['isotherminorityowned']), "partnership_or_limited_liability_partnership": self.parse_first_character(row['ispartnershiporlimitedliabilitypartnership']), "sole_proprietorship": self.parse_first_character(row['issoleproprietorship']), "subchapter_scorporation": self.parse_first_character(row['issubchapterscorporation']), "nonprofit_organization": self.parse_first_character(row['nonprofitorganizationflag']), } le = LegalEntity.objects.filter(recipient_unique_id=row['dunsnumber']).first() if not le: le = LegalEntity.objects.create(**recipient_dict) else: LegalEntity.objects.filter(legal_entity_id=le.legal_entity_id).update(**recipient_dict) return le