Exemple #1
0
 def get_population(self, code, year, given_population):
     code_without_year = code[0:5]
     try:
         pop_this_year = float(self.counties[code_without_year][year])
         pop_2010 = float(self.counties[code_without_year]['2010'])
         ratio = (pop_this_year / pop_2010)
         return float(given_population) * ratio
     except (KeyError):
         # set up personal logger
         logger = Logger()
         current_path = os.getcwd()
         logger.define_issue_log(os.path.join(current_path, 'files/issues.log'))
         logger.record_issue('No population data for county:', code)
         return given_population
Exemple #2
0
import numpy, pandas
import statistics, csv, os
from sklearn.metrics import r2_score
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from Utilities import Logger

# set up personal logger
logger = Logger()
current_path = os.getcwd()
logger.define_issue_log(os.path.join(current_path, 'files/issues.log'))

logger.log('(b = bankruptcy or bankruptcies)')


def partition(data_set, training_set_portion):
    data_set.shuffle()
    size_of_training_set = round(
        len(data_set) / float(1 - training_set_portion))
    training_set = data_set[:size_of_training_set]
    test_set = data_set[size_of_training_set:]
    return {'train': training_set, 'test': test_set}


# def learn(training_inputs, training_outputs, test_inputs, test_outputs):
df = pandas.read_csv("./files/puma-output.csv")
all_X = df[[
    'Divorce', 'Age', 'Education', 'Insurance', 'Black', 'Disabled', 'Veteran',
    'Immigrant', 'Unemployed'
]]
all_y = df['Bankruptcy']
def get_acs_person_data(filepath, year):

    # set up personal logger
    logger = Logger()
    current_path = os.getcwd()
    logger.define_issue_log(os.path.join(current_path, 'files/issues.log'))

    dictionary_of_people = dict()

    with open(filepath) as csvfile:
        file_reader = csv.reader(csvfile)
        line = next(file_reader)

        class Column(Enum):
            SERIALNO = line.index('SERIALNO')
            ST = line.index('ST')
            PUMA = line.index('PUMA')
            AGEP = line.index('AGEP')
            SCHL = line.index('SCHL')
            MAR = line.index('MAR')
            HICOV = line.index('HICOV')
            RACBLK = line.index('RACBLK')  # 0 = not black; 1 = black
            DIS = line.index('DIS')  # 1 = disabled; 2 = not disabled
            MIL = line.index('MIL')  # 1-3 = veteran
            WAOB = line.index('WAOB')  # 1-2 = non-immigrant
            NWAB = line.index('NWAB')  # 1 = temp work absence; 2 = no

        try:
            i = 0
            while True:
                acs_row = next(file_reader)

                serial_number = acs_row[Column.SERIALNO.value]
                person = Person(serial_number)
                state = acs_row[Column.ST.value]
                person.state = state  # TODO make an enum of state names and use it here
                person.puma = state + acs_row[Column.PUMA.value] + year
                person.age = acs_row[Column.AGEP.value]
                person.education = acs_row[Column.SCHL.value]
                if acs_row[Column.MAR.value] == '3':
                    person.divorced = True
                else:
                    person.divorced = False
                if acs_row[Column.HICOV.value] == '1':
                    person.insured = True
                else:
                    person.insured = False
                if acs_row[Column.RACBLK.value] == '1':
                    person.black = True
                else:
                    person.black = False
                if acs_row[Column.DIS.value] == '1':
                    person.disabled = True
                else:
                    person.disabled = False
                mil = acs_row[Column.MIL.value]
                if mil == '1' or mil == '2' or mil == '3':
                    person.veteran = True
                else:
                    person.veteran = False
                if acs_row[Column.WAOB.value] == '1' or acs_row[
                        Column.WAOB.value] == '2':
                    person.immigrant = False
                else:
                    person.immigrant = True
                nwab = acs_row[Column.NWAB.value]
                if nwab == '1':
                    person.unemployed = True
                elif nwab == '2' or nwab == 'b':
                    person.unemployed = False
                else:
                    person.unemployed = 'NA'
                id = serial_number + state + year
                dictionary_of_people[id] = person
                # i += 1
                # logger.log('Setting up person #', format(i, ',d'), erase=True)
        except StopIteration:
            pass

    logger.log('Created', format(len(dictionary_of_people), ',d'),
               'people from', filepath)
    return dictionary_of_people