def mkrecord(action_taken, agency_code, countyfp, geoid): record = HMDARecord( as_of_year=2014, respondent_id='1111111111', agency_code=agency_code, loan_amount_000s=222, action_taken=action_taken, statefp='11', countyfp=countyfp) record.geoid_id = geoid record.save()
def mkrecord(action_taken, agency_code, countyfp, geoid): record = HMDARecord(as_of_year=2014, respondent_id='1111111111', agency_code=agency_code, loan_amount_000s=222, action_taken=action_taken, statefp='11', countyfp=countyfp) record.geoid_id = geoid record.save()
def test_auto_fields(self): record = HMDARecord( as_of_year=2014, respondent_id='0123456789', agency_code='3', loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=55, preapproval='1', action_taken=1, msamd='01234', statefp='11', countyfp='222', census_tract_number ='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.geoid_id = '11222333000' record.save() self.assertEqual(record.lender, '30123456789') record.delete() record = HMDARecord( as_of_year=2014, respondent_id='01-345-789', agency_code='2', loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=55, preapproval='1', action_taken=1, msamd='01234', statefp='11', countyfp='222', census_tract_number ='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.auto_fields() self.assertEqual(record.lender, '201-345-789')
def mkrecord(action_taken, agency_code, countyfp, geoid): record = HMDARecord( as_of_year=2014, respondent_id='1111111111', agency_code=agency_code, loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=222, preapproval='1', action_taken=action_taken, msamd='01234', statefp='11', countyfp=countyfp, census_tract_number ='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.geoid_id = geoid record.save()
def test_auto_fields(self): record = HMDARecord( as_of_year=2014, respondent_id='0123456789', agency_code='3', loan_amount_000s=55, action_taken=1, statefp='11', countyfp='222') record.geoid_id = '11222333000' record.save() self.assertEqual(record.lender, '30123456789') record.delete() record = HMDARecord( as_of_year=2014, respondent_id='01-345-789', agency_code='2', loan_amount_000s=55, action_taken=1, statefp='ST', countyfp='COU') record.auto_fields() self.assertEqual(record.lender, '201-345-789')
def mkrecord_hmda(institution_id, action_taken, countyfp, geoid): respondent = Institution.objects.get(institution_id=institution_id) geo = Geo.objects.get(geoid=geoid) record = HMDARecord( as_of_year=2014, respondent_id=respondent.respondent_id, agency_code=respondent.agency_id, loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=222, preapproval='1', action_taken=action_taken, msamd='01234', statefp='11', countyfp=countyfp, census_tract_number ='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.geo = geo record.institution = respondent record.save()
def records(): """A generator returning a new Record with each call. Required as there are too many to instantiate in memory at once""" datafile = open(args[0], 'r') i = 0 for row in reader(datafile): if i % 1000000 == 0: self.stdout.write("Record %d 000,000" % (i // 1000000)) record = HMDARecord( as_of_year=int(row[0]), respondent_id=row[1], agency_code=row[2], loan_amount_000s=int(row[7]), action_taken=row[9], statefp=row[11], countyfp=row[12]) censustract = row[11] + row[12] + row[13].replace('.', '') record.geoid_id = errors.in_2010.get(censustract, censustract) record.auto_fields() if (row[11] not in known_hmda and row[11] in geo_states and 'NA' not in record.geoid_id): yield record i += 1 datafile.close()
def mkrecord(action_taken, agency_code, countyfp, geoid): record = HMDARecord( as_of_year=2014, respondent_id="1111111111", agency_code=agency_code, loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=222, preapproval="1", action_taken=action_taken, msamd="01234", statefp="11", countyfp=countyfp, census_tract_number="01234", applicant_ethnicity="1", co_applicant_ethnicity="1", applicant_race_1="1", co_applicant_race_1="1", applicant_sex="1", co_applicant_sex="1", applicant_income_000s="1000", purchaser_type="1", rate_spread="0123", hoepa_status="1", lien_status="1", sequence_number="1", population="1", minority_population="1", ffieic_median_family_income="1000", tract_to_msamd_income="1000", number_of_owner_occupied_units="1", number_of_1_to_4_family_units="1", application_date_indicator=1, ) record.geoid_id = geoid record.save()
def records(): """A generator returning a new Record with each call. Required as there are too many to instantiate in memory at once""" datafile = open(args[0], 'r') i = 0 for row in reader(datafile): if i % 1000000 == 0: self.stdout.write("Record %d 000,000" % (i // 1000000)) record = HMDARecord(as_of_year=int(row[0]), respondent_id=row[1], agency_code=row[2], loan_amount_000s=int(row[7]), action_taken=row[9], statefp=row[11], countyfp=row[12]) censustract = row[11] + row[12] + row[13].replace('.', '') record.geoid_id = errors.in_2010.get(censustract, censustract) record.auto_fields() if (row[11] not in known_hmda and row[11] in geo_states and 'NA' not in record.geoid_id): yield record i += 1 datafile.close()
def test_auto_fields(self): record = HMDARecord(as_of_year=2014, respondent_id='0123456789', agency_code='3', loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=55, preapproval='1', action_taken=1, msamd='01234', statefp='11', countyfp='222', census_tract_number='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.geoid_id = '11222333000' record.save() self.assertEqual(record.lender, '30123456789') record.delete() record = HMDARecord(as_of_year=2014, respondent_id='01-345-789', agency_code='2', loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1, loan_amount_000s=55, preapproval='1', action_taken=1, msamd='01234', statefp='11', countyfp='222', census_tract_number='01234', applicant_ethnicity='1', co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1', applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000', purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1', sequence_number='1', population='1', minority_population='1', ffieic_median_family_income='1000', tract_to_msamd_income='1000', number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1', application_date_indicator=1) record.auto_fields() self.assertEqual(record.lender, '201-345-789')
def records(self, csv_file): """A generator returning a new Record with each call. Required as there are too many to instantiate in memory at once""" prevent_delete = False datafile = open(csv_file, 'r') i = 0 inserted_counter = 0 skipped_counter = 0 log_info("Processing " + csv_file) for row in reader(datafile): i += 1 if i % 25000 == 0: log_info("Records Processed For File " + str(i)) try: record = HMDARecord(as_of_year=int(row[0]), respondent_id=row[1], agency_code=row[2], loan_type=int(row[3]), property_type=row[4], loan_purpose=int(row[5]), owner_occupancy=int(row[6]), loan_amount_000s=int(row[7]), preapproval=row[8], action_taken=int(row[9]), msamd=row[10], statefp=row[11], countyfp=row[12], census_tract_number=row[13], applicant_ethnicity=row[14], co_applicant_ethnicity=row[15], applicant_race_1=row[16], applicant_race_2=row[17], applicant_race_3=row[18], applicant_race_4=row[19], applicant_race_5=row[20], co_applicant_race_1=row[21], co_applicant_race_2=row[22], co_applicant_race_3=row[23], co_applicant_race_4=row[24], co_applicant_race_5=row[25], applicant_sex=int(row[26]), co_applicant_sex=int(row[27]), applicant_income_000s=row[28], purchaser_type=row[29], denial_reason_1=row[30], denial_reason_2=row[31], denial_reason_3=row[32], rate_spread=row[33], hoepa_status=row[34], lien_status=row[35], edit_status=row[36], sequence_number=row[37], population=row[38], minority_population=row[39], ffieic_median_family_income=row[40], tract_to_msamd_income=row[41], number_of_owner_occupied_units=row[42], number_of_1_to_4_family_units=row[43], application_date_indicator=row[44]) censustract = row[11] + row[12] + row[13].replace('.', '') record.geo_id = errors.in_2010.get(censustract, censustract) record.institution_id = row[2] + row[1] self.total_lines_read = self.total_lines_read + 1 if filter_hmda: if (row[11] not in known_hmda and row[11] in geo_states and 'NA' not in record.geo_id): inserted_counter += 1 yield record else: skipped_counter += 1 else: if row[11] in geo_states and 'NA' not in record.geo_id: inserted_counter = inserted_counter + 1 yield record else: if 'NA' in record.geo_id: self.na_skipped = self.na_skipped + 1 else: self.other_skipped = self.other_skipped + 1 self.total_skipped = self.total_skipped + 1 skipped_counter += 1 except: prevent_delete = True log_info('*****************************') log_info("Error processing csv_file") log_info("Record Line Number " + str(i)) log_info("Row: " + str(row)) log_info("Unexpected error:", sys.exc_info()[0]) log_info(traceback.print_exc()) log_info('*****************************') datafile.close() log_info("Finished Processing File: " + str(i)) log_info("Records That have been yield/Inserted For File: " + str(inserted_counter)) log_info("Records Skipped For File: " + str(skipped_counter)) if delete_file: if not prevent_delete: os.remove(csv_file)
def records(self, csv_file): """A generator returning a new Record with each call. Required as there are too many to instantiate in memory at once""" prevent_delete = False datafile = open(csv_file, "r") i = 0 inserted_counter = 0 skipped_counter = 0 log_info("Processing " + csv_file) for row in reader(datafile): i += 1 if i % 25000 == 0: log_info("Records Processed For File " + str(i)) try: record = HMDARecord( as_of_year=int(row[0]), respondent_id=row[1], agency_code=row[2], loan_type=int(row[3]), property_type=row[4], loan_purpose=int(row[5]), owner_occupancy=int(row[6]), loan_amount_000s=int(row[7]), preapproval=row[8], action_taken=int(row[9]), msamd=row[10], statefp=row[11], countyfp=row[12], census_tract_number=row[13], applicant_ethnicity=row[14], co_applicant_ethnicity=row[15], applicant_race_1=row[16], applicant_race_2=row[17], applicant_race_3=row[18], applicant_race_4=row[19], applicant_race_5=row[20], co_applicant_race_1=row[21], co_applicant_race_2=row[22], co_applicant_race_3=row[23], co_applicant_race_4=row[24], co_applicant_race_5=row[25], applicant_sex=int(row[26]), co_applicant_sex=int(row[27]), applicant_income_000s=row[28], purchaser_type=row[29], denial_reason_1=row[30], denial_reason_2=row[31], denial_reason_3=row[32], rate_spread=row[33], hoepa_status=row[34], lien_status=row[35], edit_status=row[36], sequence_number=row[37], population=row[38], minority_population=row[39], ffieic_median_family_income=row[40], tract_to_msamd_income=row[41], number_of_owner_occupied_units=row[42], number_of_1_to_4_family_units=row[43], application_date_indicator=row[44], ) censustract = row[11] + row[12] + row[13].replace(".", "") censustract = errors.in_2010.get(censustract, censustract) record.geo_id = str(record.as_of_year) + censustract record.institution_id = str(record.as_of_year) + record.agency_code + record.respondent_id self.total_lines_read = self.total_lines_read + 1 if filter_hmda: if row[11] not in known_hmda and row[11] in geo_states and "NA" not in record.geo_id: inserted_counter += 1 yield record else: skipped_counter += 1 else: if row[11] in geo_states and "NA" not in record.geo_id: inserted_counter = inserted_counter + 1 yield record else: if "NA" in record.geo_id: self.na_skipped = self.na_skipped + 1 else: self.other_skipped = self.other_skipped + 1 self.total_skipped = self.total_skipped + 1 skipped_counter += 1 except: prevent_delete = True log_info("*****************************") log_info("Error processing csv_file") log_info("Record Line Number " + str(i)) log_info("Row: " + str(row)) log_info("Unexpected error:", sys.exc_info()[0]) log_info(traceback.print_exc()) log_info("*****************************") datafile.close() log_info("Finished Processing File: " + str(i)) log_info("Records That have been yield/Inserted For File: " + str(inserted_counter)) log_info("Records Skipped For File: " + str(skipped_counter)) if delete_file: if not prevent_delete: os.remove(csv_file)
def test_auto_fields(self): record = HMDARecord(as_of_year=2014, respondent_id='0123456789', agency_code='3', loan_amount_000s=55, action_taken=1, statefp='11', countyfp='222') record.geoid_id = '11222333000' record.save() self.assertEqual(record.lender, '30123456789') record.delete() record = HMDARecord(as_of_year=2014, respondent_id='01-345-789', agency_code='2', loan_amount_000s=55, action_taken=1, statefp='ST', countyfp='COU') record.auto_fields() self.assertEqual(record.lender, '201-345-789')
def records(): """A generator returning a new Record with each call. Required as there are too many to instantiate in memory at once""" datafile = open(args[0], 'r') i = 0 for row in reader(datafile): if i % 1000000 == 0: self.stdout.write("Record %d 000,000" % (i // 1000000)) record = HMDARecord(as_of_year=int(row[0]), respondent_id=row[1], agency_code=row[2], loan_type=int(row[3]), property_type=row[4], loan_purpose=int(row[5]), owner_occupancy=int(row[6]), loan_amount_000s=int(row[7]), preapproval=row[8], action_taken=int(row[9]), msamd=row[10], statefp=row[11], countyfp=row[12], census_tract_number=row[13], applicant_ethnicity=row[14], co_applicant_ethnicity=row[15], applicant_race_1=row[16], applicant_race_2=row[17], applicant_race_3=row[18], applicant_race_4=row[19], applicant_race_5=row[20], co_applicant_race_1=row[21], co_applicant_race_2=row[22], co_applicant_race_3=row[23], co_applicant_race_4=row[24], co_applicant_race_5=row[25], applicant_sex=int(row[26]), co_applicant_sex=int(row[27]), applicant_income_000s=row[28], purchaser_type=row[29], denial_reason_1=row[30], denial_reason_2=row[31], denial_reason_3=row[32], rate_spread=row[33], hoepa_status=row[34], lien_status=row[35], edit_status=row[36], sequence_number=row[37], population=row[38], minority_population=row[39], ffieic_median_family_income=row[40], tract_to_msamd_income=row[41], number_of_owner_occupied_units=row[42], number_of_1_to_4_family_units=row[43], application_date_indicator=row[44]) censustract = row[11] + row[12] + row[13].replace('.', '') record.geoid_id = errors.in_2010.get(censustract, censustract) record.auto_fields() if (row[11] not in known_hmda and row[11] in geo_states and 'NA' not in record.geoid_id): yield record i += 1 datafile.close()