def load_file_d2(submission_attributes, award_financial_assistance_data, db_cursor): """ Process and load file D2 broker data (financial assistance award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_code": "legal_entity_city_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } legal_entity_location_value_map = {"recipient_flag": True} place_of_performance_value_map = {"place_of_performance_flag": True} fad_field_map = { "type": "assistance_type", "description": "award_description", } for row in award_financial_assistance_data: legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map) # Create the legal entity if it doesn't exist try: legal_entity = LegalEntity.objects.get( recipient_unique_id=row['awardee_or_recipient_uniqu']) except ObjectDoesNotExist: legal_entity_value_map = { "location": legal_entity_location, "legal_entity_id": row['awardee_or_recipient_uniqu'] } legal_entity = load_data_into_model( LegalEntity(), row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map) # If toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up if row['awarding_agency_code'] is None: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code if row['funding_agency_code'] is None: row['funding_agency_code'] = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id')) award.save() AWARD_UPDATE_ID_LIST.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_instance = load_data_into_model( Transaction(), row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_instance, created = Transaction.objects.get_or_create( **transaction_instance) fad_value_map = { "transaction": transaction_instance, "submission": submission_attributes, "cfda": CFDAProgram.objects.filter( program_number=row['cfda_number']).first(), 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), } financial_assistance_data = load_data_into_model( TransactionAssistance(), row, field_map=fad_field_map, value_map=fad_value_map, save=True)
def load_file_d1(submission_attributes, procurement_data, db_cursor): """ Process and load file D1 broker data (contract award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name "city_name": "place_of_performance_locat", "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = {"place_of_performance_flag": True} legal_entity_location_value_map = {"recipient_flag": True} contract_field_map = { "type": "contract_award_type", "description": "award_description" } for row in procurement_data: legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map) # Create the legal entity if it doesn't exist try: legal_entity = LegalEntity.objects.get( recipient_unique_id=row['awardee_or_recipient_uniqu']) except ObjectDoesNotExist: legal_entity_value_map = { "location": legal_entity_location, "legal_entity_id": row['awardee_or_recipient_uniqu'], } legal_entity = load_data_into_model( LegalEntity(), row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map) # If awarding/funding toptier agency code (aka CGAC) is not supplied on the D1 record, # use the sub tier code to look it up if row['awarding_agency_code'] is None: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code if row['funding_agency_code'] is None: row['funding_agency_code'] = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id')) award.save() AWARD_UPDATE_ID_LIST.append(award.id) AWARD_CONTRACT_UPDATE_ID_LIST.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_instance = load_data_into_model( Transaction(), row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_instance, created = Transaction.objects.get_or_create( **transaction_instance) contract_value_map = { 'transaction': transaction_instance, 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e']) } contract_instance = load_data_into_model(TransactionContract(), row, field_map=contract_field_map, value_map=contract_value_map, save=True)
def handle(self, *args, **options): csv_file = options['file'][0] self.logger.info("Starting load for file {}".format(csv_file)) # Create the csv reader reader = CsvDataReader(csv_file) # Create a new submission attributes object for this timestamp subattr = SubmissionAttributes() subattr.usaspending_update = datetime.now() subattr.save() # Create lists to hold model instances for bulk insert txn_list = [] txn_contract_list = [] subtier_agency_dict = h.get_subtier_agency_dict() # Store some additional support data needed for the laod award_type_dict = {a[0]: a[1] for a in AWARD_TYPES} contract_pricing_dict = {c[0]: c[1] for c in CONTRACT_PRICING_TYPES} for idx, row in enumerate(reader): if len(reader) % 1000 == 0: self.logger.info("Read row {}".format(len(reader))) row = h.cleanse_values(row) awarding_agency_id = self.get_agency_id(row["contractingofficeagencyid"], subtier_agency_dict) # Create the transaction object for this row txn_dict = { "action_date": h.convert_date(row['signeddate']), "award": self.get_or_create_award(row, awarding_agency_id), "awarding_agency_id": awarding_agency_id, "data_source": "USA", "description": row["descriptionofcontractrequirement"], "federal_action_obligation": row["dollarsobligated"], "funding_agency_id": self.get_agency_id(row["fundingrequestingagencyid"], subtier_agency_dict), "last_modified_date": h.convert_date(row['last_modified_date']), "modification_number": row["modnumber"], "place_of_performance": h.get_or_create_location( row, mapper=location_mapper_place_of_performance), "period_of_performance_current_end_date": h.convert_date(row['currentcompletiondate']), "period_of_performance_start_date": h.convert_date(row['effectivedate']), "recipient": self.get_or_create_recipient(row), "submission": subattr, "type": evaluate_contract_award_type(row), "type_description": award_type_dict.get(evaluate_contract_award_type(row)), "usaspending_unique_transaction_id": row["unique_transaction_id"] } txn = Transaction(**txn_dict) txn_list.append(txn) # Create the transaction contract object for this row txn_contract_dict = { "submission": subattr, "piid": row['piid'], "parent_award_id": row['idvpiid'], "current_total_value_award": h.parse_numeric_value(row["baseandexercisedoptionsvalue"]), "period_of_performance_potential_end_date": h.convert_date(row['ultimatecompletiondate']), "potential_total_value_of_award": h.parse_numeric_value(row["baseandalloptionsvalue"]), "epa_designated_product": self.parse_first_character(row['useofepadesignatedproducts']), "gfe_gfp": h.up2colon(row['gfe_gfp']), "cost_or_pricing_data": h.up2colon(row['costorpricingdata']), "type_of_contract_pricing": h.up2colon(row['typeofcontractpricing']), "type_of_contract_pricing_description": contract_pricing_dict.get(h.up2colon(row['typeofcontractpricing'])), "multiple_or_single_award_idv": h.up2colon(row['multipleorsingleawardidc']), "naics": h.up2colon(row['nationalinterestactioncode']), "dod_claimant_program_code": h.up2colon(row['claimantprogramcode']), "commercial_item_acquisition_procedures": h.up2colon( row['commercialitemacquisitionprocedures']), "commercial_item_test_program": h.up2colon(row['commercialitemtestprogram']), "consolidated_contract": h.up2colon(row['consolidatedcontract']), "contingency_humanitarian_or_peacekeeping_operation": h.up2colon( row['contingencyhumanitarianpeacekeepingoperation']), "contract_bundling": h.up2colon(row['contractbundling']), "contract_financing": h.up2colon(row['contractfinancing']), "contracting_officers_determination_of_business_size": h.up2colon( row['contractingofficerbusinesssizedetermination']), "country_of_product_or_service_origin": h.up2colon(row['countryoforigin']), "davis_bacon_act": h.up2colon(row['davisbaconact']), "evaluated_preference": h.up2colon(row['evaluatedpreference']), "extent_competed": h.up2colon(row['extentcompeted']), "information_technology_commercial_item_category": h.up2colon( row['informationtechnologycommercialitemcategory']), "interagency_contracting_authority": h.up2colon(row['interagencycontractingauthority']), "local_area_set_aside": h.up2colon(row['localareasetaside']), "purchase_card_as_payment_method": h.up2colon(row['purchasecardaspaymentmethod']), "multi_year_contract": h.up2colon(row['multiyearcontract']), "national_interest_action": h.up2colon(row['nationalinterestactioncode']), "number_of_actions": h.up2colon(row['numberofactions']), "number_of_offers_received": h.up2colon(row['numberofoffersreceived']), "performance_based_service_acquisition": h.up2colon(row['performancebasedservicecontract']), "place_of_manufacture": h.up2colon(row['placeofmanufacture']), "product_or_service_code": h.up2colon(row['productorservicecode']), "recovered_materials_sustainability": h.up2colon(row['recoveredmaterialclauses']), "research": h.up2colon(row['research']), "sea_transportation": h.up2colon(row['seatransportation']), "service_contract_act": h.up2colon(row['servicecontractact']), "small_business_competitiveness_demonstration_program": self.parse_first_character( row['smallbusinesscompetitivenessdemonstrationprogram']), "solicitation_procedures": h.up2colon(row['solicitationprocedures']), "subcontracting_plan": h.up2colon(row['subcontractplan']), "type_set_aside": h.up2colon(row['typeofsetaside']), "walsh_healey_act": h.up2colon(row['walshhealyact']), "rec_flag": self.parse_first_character(h.up2colon(row['rec_flag'])), "type_of_idc": self.parse_first_character(row['typeofidc']), "a76_fair_act_action": self.parse_first_character(row['a76action']), "clinger_cohen_act_planning": self.parse_first_character(row['clingercohenact']), "cost_accounting_standards": self.parse_first_character( row['costaccountingstandardsclause']), "fed_biz_opps": self.parse_first_character(row['fedbizopps']), "foreign_funding": self.parse_first_character(row['fundedbyforeignentity']), "major_program": self.parse_first_character(row['majorprogramcode']), "program_acronym": self.parse_first_character(row['programacronym']), "referenced_idv_modification_number": self.parse_first_character( row['idvmodificationnumber']), "transaction_number": self.parse_first_character(row['transactionnumber']), "solicitation_identifier": self.parse_first_character(row['solicitationid']) } txn_contract = TransactionContract(**txn_contract_dict) txn_contract_list.append(txn_contract) # Bulk insert transaction rows self.logger.info("Starting Transaction bulk insert ({} records)".format(len(txn_list))) Transaction.objects.bulk_create(txn_list) self.logger.info("Completed Transaction bulk insert") # Update txn contract list with newly-inserted transactions award_id_list = [] # we'll need this when updating the awards later on for idx, t in enumerate(txn_contract_list): # add transaction info to this TransactionContract object t.transaction = txn_list[idx] # add the corresponding award id to a list we'll use when batch-updating award data award_id_list.append(txn_list[idx].award_id) # Bulk insert transaction contract rows self.logger.info("Starting TransactionContract bulk insert ({} records)".format(len(txn_contract_list))) TransactionContract.objects.bulk_create(txn_contract_list) self.logger.info("Completed TransactionContract bulk insert") # Update awards to reflect latest transaction information # (note that this can't be done via signals or a save() # override in the model itself, because those aren't # triggered by a bulk update self.logger.info("Starting Awards update") count = update_awards(tuple(award_id_list)) update_contract_awards(tuple(award_id_list)) self.logger.info("Completed Awards update ({} records)".format(count))
def handle(self, *args, **options): h.clear_caches() csv_file = options['file'][0] self.logger.info("Starting load for file {}".format(csv_file)) # Create the csv reader reader = CsvDataReader(csv_file) # Create a new submission attributes object for this timestamp subattr = SubmissionAttributes() subattr.usaspending_update = datetime.now() subattr.save() # Create lists to hold model instances for bulk insert txn_list = [] txn_assistance_list = [] for idx, row in enumerate(reader): if len(reader) % 1000 == 0: self.logger.info("Read row {}".format(len(reader))) row = h.cleanse_values(row) awarding_agency = self.get_awarding_agency( row) # todo: use agency dict? # Create the transaction object for this row txn_dict = { "submission": subattr, "action_date": h.convert_date(row['obligation_action_date']), "action_type": h.up2colon(row['action_type']), "award": self.get_or_create_award(row, awarding_agency=awarding_agency), "awarding_agency": awarding_agency, "description": row["project_description"], # ?? account_title is anther contender? "data_source": "USA", "federal_action_obligation": row["fed_funding_amount"], "last_modified_date": h.convert_date(row['last_modified_date']), "modification_number": row["federal_award_mod"], # ?? "period_of_performance_start_date": h.convert_date(row['starting_date']), "period_of_performance_current_end_date": h.convert_date(row['ending_date']), "place_of_performance": h.get_or_create_location( row, location_mapper_fin_assistance_principal_place), "recipient": self.get_or_create_recipient(row), "type": h.up2colon(row['assistance_type']), "usaspending_unique_transaction_id": row["unique_transaction_id"], # ??"funding_agency_id": # ?? "certified date": } txn = Transaction(**txn_dict) txn.fiscal_year = fy(txn.action_date) txn_list.append(txn) # Create the transaction contract object for this row txn_assistance_dict = { "submission": subattr, "fain": row["federal_award_id"], "uri": row["uri"], "cfda": Cfda.objects.filter( program_number=row["cfda_program_num"]).first(), "correction_late_delete_indicator": h.up2colon(row['correction_late_ind']), "face_value_loan_guarantee": row["face_loan_guran"], "fiscal_year_and_quarter_correction": row["fyq_correction"], "non_federal_funding_amount": row["non_fed_funding_amount"], "original_loan_subsidy_cost": row["orig_sub_guran"], # ?? "record_type": int(h.up2colon(row['record_type'])), "sai_number": row["sai_number"], "submitted_type": "C", # ?? For CSV? } # ?? business_funds_indicator # ?? reporting period start/end?? txn_assistance = TransactionAssistance(**txn_assistance_dict) txn_assistance_list.append(txn_assistance) # Bulk insert transaction rows self.logger.info( "Starting Transaction bulk insert ({} records)".format( len(txn_list))) Transaction.objects.bulk_create(txn_list) self.logger.info("Completed Transaction bulk insert") # Update txn assistance list with newly-inserted transactions award_id_list = [] # we'll need this when updating the awards later on for idx, t in enumerate(txn_assistance_list): t.transaction = txn_list[idx] award_id_list.append(txn_list[idx].award_id) # Bulk insert transaction assistance rows self.logger.info( "Starting TransactionAssistance bulk insert ({} records)".format( len(txn_assistance_list))) TransactionAssistance.objects.bulk_create(txn_assistance_list) self.logger.info("Completed TransactionAssistance bulk insert") # Update awards to reflect latest transaction information # (note that this can't be done via signals or a save() # override in the model itself, because those aren't # triggered by a bulk update self.logger.info("Starting Awards update") count = update_awards(tuple(award_id_list)) update_contract_awards(tuple(award_id_list)) update_model_description_fields() self.logger.info("Completed Awards update ({} records)".format(count))
def test_txn_get_or_create(): """Test Transaction.get_or_create_transaction method.""" agency1 = mommy.make('references.Agency') agency2 = mommy.make('references.Agency') sub = mommy.make('submissions.SubmissionAttributes') awd1 = mommy.make('awards.Award', awarding_agency=agency1) txn1 = mommy.make('awards.Transaction', award=awd1, modification_number='1', awarding_agency=agency1, submission=sub, last_modified_date=date(2012, 7, 13)) txn1_id = txn1.id assert Transaction.objects.all().count() == 1 # record with same award but different mod number is inserted as new txn txn_dict = { 'submission': sub, 'award': awd1, 'modification_number': '2', 'awarding_agency': agency1, 'action_date': date(1999, 12, 31), # irrelevant, but required txn field 'last_modified_date': date(2012, 3, 1) } txn = Transaction.get_or_create_transaction(**txn_dict) txn.save() assert Transaction.objects.all().count() == 2 # record with same agency/mod # but different award is inserted as new txn txn_dict = { 'submission': sub, 'award': mommy.make('awards.Award'), 'modification_number': '1', 'awarding_agency': agency1, 'action_date': date(1999, 12, 31), # irrelevant, but required txn field 'last_modified_date': date(2012, 3, 1) } txn = Transaction.get_or_create_transaction(**txn_dict) txn.save() assert Transaction.objects.all().count() == 3 # record with no matching pieces of info is inserted as new txn txn_dict = { 'submission': sub, 'award': mommy.make('awards.Award'), 'modification_number': '99', 'awarding_agency': agency2, 'action_date': date(1999, 12, 31), # irrelevant, but required txn field 'last_modified_date': date(2012, 3, 1) } txn = Transaction.get_or_create_transaction(**txn_dict) txn.save() assert Transaction.objects.all().count() == 4 # if existing txn's last modified date < the incoming txn # last modified date, update the existing txn txn_dict = { 'submission': sub, 'award': awd1, 'modification_number': '1', 'awarding_agency': agency1, 'action_date': date(1999, 12, 31), # irrelevant, but required txn field 'last_modified_date': date(2013, 7, 13), 'description': 'new description' } txn = Transaction.get_or_create_transaction(**txn_dict) txn.save() # expecting an update, not an insert, so txn count should be unchanged assert Transaction.objects.all().count() == 4 assert txn.id == txn1_id assert txn.description == 'new description' assert txn.last_modified_date == date(2013, 7, 13) # if existing txn last modified date > the incoming # last modified date, do nothing txn_dict = { 'submission': sub, 'award': awd1, 'modification_number': '1', 'awarding_agency': agency1, 'action_date': date(1999, 12, 31), # irrelevant, but required txn field 'last_modified_date': date(2013, 3, 1), 'description': 'an older txn' } txn = Transaction.get_or_create_transaction(**txn_dict) txn.save() # expecting an update, not an insert, so txn count should be unchanged assert Transaction.objects.all().count() == 4 assert txn.description == 'new description' assert txn.last_modified_date == date(2013, 7, 13) # if the txn already exists and its certified date # is < the incoming certified date, update # the existing txn sub2 = mommy.make('submissions.SubmissionAttributes', certified_date=date(2015, 7, 13)) sub3 = mommy.make('submissions.SubmissionAttributes', certified_date=date(2016, 7, 13)) txn2 = mommy.make('awards.Transaction', award=awd1, modification_number='5', awarding_agency=agency1, submission=sub2) txn2_id = txn2.id txn_dict = { 'submission': sub3, # same txn attributes, but use a more recent certified date 'award': awd1, 'modification_number': '5', 'awarding_agency': agency1, 'action_date': date(1999, 12, 31), # irrelevant, but required txn field 'description': 'new description' } txn = Transaction.get_or_create_transaction(**txn_dict) txn.save() # there should now be one more txn in the table, and it should reflect # the most recent updates assert Transaction.objects.all().count() == 5 assert txn.id == txn2_id assert txn.description == 'new description' assert txn.submission.certified_date == sub3.certified_date # if the txn already exists and its certified date is > # the incoming certified date, do nothing sub4 = mommy.make('submissions.SubmissionAttributes', certified_date=date(2015, 1, 1)) txn_dict['submission'] = sub4 txn_dict['description'] = 'older description' txn = Transaction.get_or_create_transaction(**txn_dict) txn.save() # there should be no change in number of txn records assert Transaction.objects.all().count() == 5 # attributes of txn should be unchanged assert txn.id == txn2_id assert txn.description == 'new description' # if the txn already exists and its certified date is < the # incoming last modified date, update it txn_dict = { 'submission': sub, # this submission has a null certified date 'award': awd1, 'modification_number': '5', 'awarding_agency': agency1, 'action_date': date(1999, 12, 31), # irrelevant, but required txn field 'last_modified_date': date(2020, 2, 1), 'description': 'even newer description' } txn = Transaction.get_or_create_transaction(**txn_dict) txn.save() # there should be no change in number of txn records assert Transaction.objects.all().count() == 5 # txn id should be unchanged assert txn.id == txn2_id # txn attributes should be updated assert txn.description == 'even newer description' assert txn.last_modified_date == date(2020, 2, 1) assert txn.submission.certified_date is None # if the txn alread exists and its last modified date is < # the incoming certified date, update it sub5 = mommy.make('submissions.SubmissionAttributes', certified_date=date(2020, 7, 13)) txn_dict = { 'submission': sub5, 'award': awd1, 'modification_number': '5', 'awarding_agency': agency1, 'action_date': date(1999, 12, 31), # irrelevant, but required txn field 'last_modified_date': None, 'description': 'txn from the future!' } txn = Transaction.get_or_create_transaction(**txn_dict) txn.save() # there should be no change in number of txn records assert Transaction.objects.all().count() == 5 # txn id should be unchanged assert txn.id == txn2_id # txn attributes should be updated assert txn.description == 'txn from the future!' assert txn.last_modified_date is None assert txn.submission.certified_date == sub5.certified_date