def get_population(self, code, year, given_population): code_without_year = code[0:5] try: pop_this_year = float(self.counties[code_without_year][year]) pop_2010 = float(self.counties[code_without_year]['2010']) ratio = (pop_this_year / pop_2010) return float(given_population) * ratio except (KeyError): # set up personal logger logger = Logger() current_path = os.getcwd() logger.define_issue_log(os.path.join(current_path, 'files/issues.log')) logger.record_issue('No population data for county:', code) return given_population
import numpy, pandas import statistics, csv, os from sklearn.metrics import r2_score from sklearn import linear_model from sklearn.model_selection import train_test_split from Utilities import Logger # set up personal logger logger = Logger() current_path = os.getcwd() logger.define_issue_log(os.path.join(current_path, 'files/issues.log')) logger.log('(b = bankruptcy or bankruptcies)') def partition(data_set, training_set_portion): data_set.shuffle() size_of_training_set = round( len(data_set) / float(1 - training_set_portion)) training_set = data_set[:size_of_training_set] test_set = data_set[size_of_training_set:] return {'train': training_set, 'test': test_set} # def learn(training_inputs, training_outputs, test_inputs, test_outputs): df = pandas.read_csv("./files/puma-output.csv") all_X = df[[ 'Divorce', 'Age', 'Education', 'Insurance', 'Black', 'Disabled', 'Veteran', 'Immigrant', 'Unemployed' ]] all_y = df['Bankruptcy']
def get_acs_person_data(filepath, year): # set up personal logger logger = Logger() current_path = os.getcwd() logger.define_issue_log(os.path.join(current_path, 'files/issues.log')) dictionary_of_people = dict() with open(filepath) as csvfile: file_reader = csv.reader(csvfile) line = next(file_reader) class Column(Enum): SERIALNO = line.index('SERIALNO') ST = line.index('ST') PUMA = line.index('PUMA') AGEP = line.index('AGEP') SCHL = line.index('SCHL') MAR = line.index('MAR') HICOV = line.index('HICOV') RACBLK = line.index('RACBLK') # 0 = not black; 1 = black DIS = line.index('DIS') # 1 = disabled; 2 = not disabled MIL = line.index('MIL') # 1-3 = veteran WAOB = line.index('WAOB') # 1-2 = non-immigrant NWAB = line.index('NWAB') # 1 = temp work absence; 2 = no try: i = 0 while True: acs_row = next(file_reader) serial_number = acs_row[Column.SERIALNO.value] person = Person(serial_number) state = acs_row[Column.ST.value] person.state = state # TODO make an enum of state names and use it here person.puma = state + acs_row[Column.PUMA.value] + year person.age = acs_row[Column.AGEP.value] person.education = acs_row[Column.SCHL.value] if acs_row[Column.MAR.value] == '3': person.divorced = True else: person.divorced = False if acs_row[Column.HICOV.value] == '1': person.insured = True else: person.insured = False if acs_row[Column.RACBLK.value] == '1': person.black = True else: person.black = False if acs_row[Column.DIS.value] == '1': person.disabled = True else: person.disabled = False mil = acs_row[Column.MIL.value] if mil == '1' or mil == '2' or mil == '3': person.veteran = True else: person.veteran = False if acs_row[Column.WAOB.value] == '1' or acs_row[ Column.WAOB.value] == '2': person.immigrant = False else: person.immigrant = True nwab = acs_row[Column.NWAB.value] if nwab == '1': person.unemployed = True elif nwab == '2' or nwab == 'b': person.unemployed = False else: person.unemployed = 'NA' id = serial_number + state + year dictionary_of_people[id] = person # i += 1 # logger.log('Setting up person #', format(i, ',d'), erase=True) except StopIteration: pass logger.log('Created', format(len(dictionary_of_people), ',d'), 'people from', filepath) return dictionary_of_people