def test_auto_fields(self): record = HMDARecord( as_of_year=2015, respondent_id='22-333', agency_code='9', loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=55, preapproval='1', action_taken=1, msamd='01234', statefp='11', countyfp='222', census_tract_number ='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.geo_id = '11222333000' record.institution_id='922-333' record.save() self.assertEqual(record.institution_id, '922-333') record.delete() record = HMDARecord( as_of_year=2015, respondent_id='22-333', agency_code='9', loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=55, preapproval='1', action_taken=1, msamd='01234', statefp='11', countyfp='222', census_tract_number ='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.geo_id='11222333000' record.institution_id='922-333' self.assertEqual(record.institution_id, '922-333')
def mkrecord(action_taken, countyfp, geoid): respondent = Institution.objects.get(institution_id="922-333") record = HMDARecord(as_of_year=2014, respondent_id=respondent.respondent_id, agency_code=respondent.agency_id, loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=222, preapproval='1', action_taken=action_taken, msamd='01234', statefp='11', countyfp=countyfp, census_tract_number='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.geo_geoid = geoid record.institution_id = respondent.institution_id record.save()
def records(self, csv_file): """A generator returning a new Record with each call. Required as there are too many to instantiate in memory at once""" prevent_delete = False datafile = open(csv_file, 'r') i = 0 inserted_counter = 0 skipped_counter = 0 log_info("Processing " + csv_file) for row in reader(datafile): i += 1 if i % 25000 == 0: log_info("Records Processed For File " + str(i)) try: record = HMDARecord(as_of_year=int(row[0]), respondent_id=row[1], agency_code=row[2], loan_type=int(row[3]), property_type=row[4], loan_purpose=int(row[5]), owner_occupancy=int(row[6]), loan_amount_000s=int(row[7]), preapproval=row[8], action_taken=int(row[9]), msamd=row[10], statefp=row[11], countyfp=row[12], census_tract_number=row[13], applicant_ethnicity=row[14], co_applicant_ethnicity=row[15], applicant_race_1=row[16], applicant_race_2=row[17], applicant_race_3=row[18], applicant_race_4=row[19], applicant_race_5=row[20], co_applicant_race_1=row[21], co_applicant_race_2=row[22], co_applicant_race_3=row[23], co_applicant_race_4=row[24], co_applicant_race_5=row[25], applicant_sex=int(row[26]), co_applicant_sex=int(row[27]), applicant_income_000s=row[28], purchaser_type=row[29], denial_reason_1=row[30], denial_reason_2=row[31], denial_reason_3=row[32], rate_spread=row[33], hoepa_status=row[34], lien_status=row[35], edit_status=row[36], sequence_number=row[37], population=row[38], minority_population=row[39], ffieic_median_family_income=row[40], tract_to_msamd_income=row[41], number_of_owner_occupied_units=row[42], number_of_1_to_4_family_units=row[43], application_date_indicator=row[44]) censustract = row[11] + row[12] + row[13].replace('.', '') record.geo_id = errors.in_2010.get(censustract, censustract) record.institution_id = row[2] + row[1] self.total_lines_read = self.total_lines_read + 1 if filter_hmda: if (row[11] not in known_hmda and row[11] in geo_states and 'NA' not in record.geo_id): inserted_counter += 1 yield record else: skipped_counter += 1 else: if row[11] in geo_states and 'NA' not in record.geo_id: inserted_counter = inserted_counter + 1 yield record else: if 'NA' in record.geo_id: self.na_skipped = self.na_skipped + 1 else: self.other_skipped = self.other_skipped + 1 self.total_skipped = self.total_skipped + 1 skipped_counter += 1 except: prevent_delete = True log_info('*****************************') log_info("Error processing csv_file") log_info("Record Line Number " + str(i)) log_info("Row: " + str(row)) log_info("Unexpected error:", sys.exc_info()[0]) log_info(traceback.print_exc()) log_info('*****************************') datafile.close() log_info("Finished Processing File: " + str(i)) log_info("Records That have been yield/Inserted For File: " + str(inserted_counter)) log_info("Records Skipped For File: " + str(skipped_counter)) if delete_file: if not prevent_delete: os.remove(csv_file)
def records(self, csv_file): """A generator returning a new Record with each call. Required as there are too many to instantiate in memory at once""" prevent_delete = False datafile = open(csv_file, "r") i = 0 inserted_counter = 0 skipped_counter = 0 log_info("Processing " + csv_file) for row in reader(datafile): i += 1 if i % 25000 == 0: log_info("Records Processed For File " + str(i)) try: record = HMDARecord( as_of_year=int(row[0]), respondent_id=row[1], agency_code=row[2], loan_type=int(row[3]), property_type=row[4], loan_purpose=int(row[5]), owner_occupancy=int(row[6]), loan_amount_000s=int(row[7]), preapproval=row[8], action_taken=int(row[9]), msamd=row[10], statefp=row[11], countyfp=row[12], census_tract_number=row[13], applicant_ethnicity=row[14], co_applicant_ethnicity=row[15], applicant_race_1=row[16], applicant_race_2=row[17], applicant_race_3=row[18], applicant_race_4=row[19], applicant_race_5=row[20], co_applicant_race_1=row[21], co_applicant_race_2=row[22], co_applicant_race_3=row[23], co_applicant_race_4=row[24], co_applicant_race_5=row[25], applicant_sex=int(row[26]), co_applicant_sex=int(row[27]), applicant_income_000s=row[28], purchaser_type=row[29], denial_reason_1=row[30], denial_reason_2=row[31], denial_reason_3=row[32], rate_spread=row[33], hoepa_status=row[34], lien_status=row[35], edit_status=row[36], sequence_number=row[37], population=row[38], minority_population=row[39], ffieic_median_family_income=row[40], tract_to_msamd_income=row[41], number_of_owner_occupied_units=row[42], number_of_1_to_4_family_units=row[43], application_date_indicator=row[44], ) censustract = row[11] + row[12] + row[13].replace(".", "") censustract = errors.in_2010.get(censustract, censustract) record.geo_id = str(record.as_of_year) + censustract record.institution_id = str(record.as_of_year) + record.agency_code + record.respondent_id self.total_lines_read = self.total_lines_read + 1 if filter_hmda: if row[11] not in known_hmda and row[11] in geo_states and "NA" not in record.geo_id: inserted_counter += 1 yield record else: skipped_counter += 1 else: if row[11] in geo_states and "NA" not in record.geo_id: inserted_counter = inserted_counter + 1 yield record else: if "NA" in record.geo_id: self.na_skipped = self.na_skipped + 1 else: self.other_skipped = self.other_skipped + 1 self.total_skipped = self.total_skipped + 1 skipped_counter += 1 except: prevent_delete = True log_info("*****************************") log_info("Error processing csv_file") log_info("Record Line Number " + str(i)) log_info("Row: " + str(row)) log_info("Unexpected error:", sys.exc_info()[0]) log_info(traceback.print_exc()) log_info("*****************************") datafile.close() log_info("Finished Processing File: " + str(i)) log_info("Records That have been yield/Inserted For File: " + str(inserted_counter)) log_info("Records Skipped For File: " + str(skipped_counter)) if delete_file: if not prevent_delete: os.remove(csv_file)
def test_auto_fields(self): record = HMDARecord(as_of_year=2015, respondent_id='22-333', agency_code='9', loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=55, preapproval='1', action_taken=1, msamd='01234', statefp='11', countyfp='222', census_tract_number='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.geo_id = '11222333000' record.institution_id = '922-333' record.save() self.assertEqual(record.institution_id, '922-333') record.delete() record = HMDARecord(as_of_year=2015, respondent_id='22-333', agency_code='9', loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=55, preapproval='1', action_taken=1, msamd='01234', statefp='11', countyfp='222', census_tract_number='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.geo_id = '11222333000' record.institution_id = '922-333' self.assertEqual(record.institution_id, '922-333')