def update_sheet(): params = Params() census = Census() google = Google() data = request.args acs_year = str(data['year']) params.pword_validate(str(data['pword'])) acs_year = int(data['year']) #as int for validation params.year_validate(acs_year) acs_year = str(acs_year) #as string for concatentation in query string api = google.auth('FLASK_ENV') wb = google.open_workbook(api, 'FLASK_ENV') API_KEY = census.get_census_api_key() URL = 'https://api.census.gov/data/' YEAR = acs_year + '/' DATA_SET = 'acs/acs5' BASE_URL = URL + YEAR + DATA_SET GET = '?get=' GROSS_RENT_PERCENT_INCOME_25_30 = 'B25070_006E' GROSS_RENT_PERCENT_INCOME_30_34 = 'B25070_007E' GROSS_RENT_PERCENT_INCOME_35_39 = 'B25070_008E' GROSS_RENT_PERCENT_INCOME_40_49 = 'B25070_009E' GROSS_RENT_PERCENT_INCOME_50_PLUS = 'B25070_010E' TOTAL_POPULATION_BURDENED = 'B25070_001E' MED_INCOME = 'B06011_001E' COMMA = ',' FOR = '&for=' IN = '&in=' ALL_STATES = 'state:*' ALL_COUNTY = 'county:*' # FINAL_URL = https://api.census.gov/data/2018/acs/acs5?get=B25070_010E&for=county:*&in=state:* # this string will get the population of individuals that pay 30 - 50% of their income # in rent for all counties in in all state throughout the US. FINAL_URL = BASE_URL \ + GET + GROSS_RENT_PERCENT_INCOME_50_PLUS + COMMA\ + GROSS_RENT_PERCENT_INCOME_25_30 + COMMA\ + GROSS_RENT_PERCENT_INCOME_30_34 + COMMA\ + GROSS_RENT_PERCENT_INCOME_35_39 + COMMA\ + GROSS_RENT_PERCENT_INCOME_40_49 + COMMA\ + TOTAL_POPULATION_BURDENED\ + FOR + ALL_COUNTY\ + IN + ALL_STATES r = requests.get(url=FINAL_URL + API_KEY) # values is the return value from the census API values = r.json() df = pd.DataFrame(values) #headers for df df.columns = [ 'GROSS_RENT_PERCENT_INCOME_50_PLUS', 'GROSS_RENT_PERCENT_INCOME_25_30', 'GROSS_RENT_PERCENT_INCOME_30_34', 'GROSS_RENT_PERCENT_INCOME_35_39', 'GROSS_RENT_PERCENT_INCOME_40_49', 'TOTAL_POPULATION_BURDENED', 'state fips', 'county fips' ] df.drop([0], inplace=True) #compute burdening, convert to percent, round to 4 sig fig df['PERCENT RENT BURDENED'] = ( ((pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_25_30']) + pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_30_34']) + pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_35_39']) + pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_40_49'])) / pd.to_numeric(df['TOTAL_POPULATION_BURDENED'])) * 100).round(4) df['PERCENT SEVERLY RENT BURDENED'] = ( (pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_50_PLUS']) / pd.to_numeric(df['TOTAL_POPULATION_BURDENED'])) * 100).round(4) df['fips'] = df['state fips'] + df['county fips'] df['state'] = df['state fips'].apply( lambda x: us.states.lookup(x)) #state name from us lib df['county'] = df['fips'].apply(lambda x: fips_dict[x]) df.dropna(inplace=True) #make current date title of sheet # title = date.today().strftime("%m-%d-%y") sheet = google.worksheet_by_title_wrapper(wb, 'viz burden data') google.clear_wrapper(sheet) google.set_dataframe_wrapper(sheet, df, (1, 1)) return 'ayyyyy'
def update_sheet(): params = Params() census = Census() google = Google() data = request.args acs_year = str(data['year']) params.pword_validate(str(data['pword'])) acs_year = int(data['year']) #as int for validation params.year_validate(acs_year) acs_year = str(acs_year) #as string for concatentation in query string api = google.auth('FLASK_ENV') wb = google.open_workbook(api, 'FLASK_ENV') fips_codes = { "001": "Baker", "003": "Benton", "005": "Clackamas", "007": "Clatsop", "009": "Columbia", "011": "Coos", "013": "Crook", "015": "Curry", "017": "Deschutes", "019": "Douglas", "021": "Gilliam", "023": "Grant", "025": "Harney", "027": "Hood River", "029": "Jackson", "031": "Jefferson", "033": "Josephine", "035": "Klamath", "037": "Lake", "039": "Lane", "041": "Lincoln", "043": "Linn", "045": "Malheur", "047": "Marion", "049": "Morrow", "051": "Multnomah", "053": "Polk", "055": "Sherman", "057": "Tillamook", "059": "Umatilla", "061": "Union", "063": "Wallowa", "065": "Wasco", "067": "Washington", "069": "Wheeler", "071": "Yamhill" } API_KEY = census.get_census_api_key() URL = 'https://api.census.gov/data/' YEAR = acs_year + '/' DATA_SET = 'acs/acs5' BASE_URL = URL + YEAR + DATA_SET GET = '?get=' MED_GROSS_RENT = 'B25064_001E' MED_GROSS_RENT_DOLLARS = 'B25064_001E' GROSS_RENT_TOTAL = 'B25063_001E' GROSS_RENT_PERCENT_INCOME_25_30 = 'B25070_006E' GROSS_RENT_PERCENT_INCOME_30_34 = 'B25070_007E' GROSS_RENT_PERCENT_INCOME_35_39 = 'B25070_008E' GROSS_RENT_PERCENT_INCOME_40_49 = 'B25070_009E' GROSS_RENT_PERCENT_INCOME_50_PLUS = 'B25070_010E' TOTAL_POPULATION_BURDENED = 'B25070_001E' MED_INCOME = 'B06011_001E' COMMA = ',' FOR = '&for=' IN = '&in=' PLUS = '+' STATE = 'state:' ALL_STATES = 'state:*' COUNTY = 'county:' OREGON = '41' DESCHUTES = '017' CROOK = '013' JEFFERSON = '031' # FINAL_URL = https://api.census.gov/data/2018/acs/acs5?get=B25070_010E&for=county:*&in=state:41 # this string will get the population of individuals that pay 30 - 50% of their income # in rent for all counties in oregon. # i.e. one list being ['5690', '41', '047'], meaning 5690 people sampled spend 50% or more of # their income on rent in the county 047 (FIPS code for Marion county) in the state 41 (FIPS code for Oregon) FINAL_URL = BASE_URL \ + GET + GROSS_RENT_PERCENT_INCOME_50_PLUS + COMMA\ + GROSS_RENT_PERCENT_INCOME_25_30 + COMMA\ + GROSS_RENT_PERCENT_INCOME_30_34 + COMMA\ + GROSS_RENT_PERCENT_INCOME_35_39 + COMMA\ + GROSS_RENT_PERCENT_INCOME_40_49 + COMMA\ + TOTAL_POPULATION_BURDENED\ + FOR + COUNTY + '*'\ + IN + STATE + OREGON r = requests.get(url=FINAL_URL + API_KEY) # values is the return value from the census API values = r.json() df = pd.DataFrame(values) #headers for df df.columns = [ 'GROSS_RENT_PERCENT_INCOME_50_PLUS', 'GROSS_RENT_PERCENT_INCOME_25_30', 'GROSS_RENT_PERCENT_INCOME_30_34', 'GROSS_RENT_PERCENT_INCOME_35_39', 'GROSS_RENT_PERCENT_INCOME_40_49', 'TOTAL_POPULATION_BURDENED', 'state', 'county' ] # pandas return copies so you must place it in a variable df = df.drop([0]) # this df takes in all the populations of people rent burdened (25-50% of income), sums them, # and then divides the sum by the total population of those surveyed to get # the percentage of people burdened, multiplies that value by 100 to get a percent, # and then maps the county name from the county fips code. trans_df = pd.DataFrame(df['TOTAL_POPULATION_BURDENED']) trans_df['PERCENT RENT BURDENED'] = ( pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_25_30']) + pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_30_34']) + pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_35_39']) + pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_40_49'])) / pd.to_numeric( df['TOTAL_POPULATION_BURDENED']) trans_df['PERCENT SEVERLY RENT BURDENED'] = pd.to_numeric( df['GROSS_RENT_PERCENT_INCOME_50_PLUS']) / pd.to_numeric( df['TOTAL_POPULATION_BURDENED']) # get percents from floats trans_df['PERCENT SEVERLY RENT BURDENED'] = trans_df[ 'PERCENT SEVERLY RENT BURDENED'] * 100 trans_df['PERCENT RENT BURDENED'] = trans_df['PERCENT RENT BURDENED'] * 100 trans_df['COUNTY FIPS'] = df['county'] trans_df['COUNTY NAME'] = df['county'].map(fips_codes) # #gsheet sheet = google.worksheet_by_title_wrapper(wb, 'viz burden data') google.clear_wrapper(sheet) google.set_dataframe_wrapper(sheet, trans_df, (1, 1)) county_dict = {"013": "Crook", "017": "Deschutes", "031": "Jefferson"} # household_income is a dict of lists to store all income brackets ($10,000 to $14,999, $15,000 to $19,999,...$200,000+) household_incomes = {} for values in county_dict.values(): household_incomes[values] = [] NUM_HOUSEHOLD_INCOME_VARIABLES = 17 for i in range(2, NUM_HOUSEHOLD_INCOME_VARIABLES + 1): # B19001_00 + i + E is a range of income variables in the acs5 FINAL_URL = BASE_URL \ + GET + ('B19001_00' if i < 10 else 'B19001_0') + str(i) + 'E' \ + FOR + COUNTY + CROOK + COMMA\ + DESCHUTES + COMMA \ + JEFFERSON\ + IN + STATE + OREGON r = requests.get(url=FINAL_URL + API_KEY) values = r.json() # get number of individuals in ith bracket and match with respective key for i in range(1, len(values)): # add to household_income the value which matches the fips value which matches the key in fips_codes # household_incomes[fips_codes[values[i][2]]].append(int(values[i][0])) # fips_codes[047] = Marion # int(values[1][0]) = 5690 # household_incomes[Marion].append(int(5690) # household_incom = {Marion: [5690]} household_incomes[fips_codes[values[i][2]]].append( int(values[i][0])) # this df takes the previous request for household incomes and the normalizes those values # this should maybe be changed to min/max normalization which is easy to do with the lambda df = pd.DataFrame.from_dict(household_incomes) trans_df = df.transpose() trans_df.columns = [ 'Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999', '$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999', '$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999', '$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more' ] counties_df = pd.DataFrame.from_dict(county_dict, orient='index') counties_df.columns = ['county'] normalized_df = trans_df.apply(lambda x: x / x.max(), axis=1) # gsheet sheet = google.worksheet_by_title_wrapper(wb, 'viz household income data') google.clear_wrapper(sheet) google.set_dataframe_wrapper(sheet, counties_df, (1, 1)) google.set_dataframe_wrapper(sheet, normalized_df, (1, 2)) # This request is a bit weird. The loop grabs rent burdening data from the ACS from 2011 to the date # user supplies and then stores those values in a dictionary, with the county as the main key, # years as subsequent child keys, and then the burdening info as the value trends = {} for values in county_dict.values(): trends[values] = [] # historic rent burdening data used in linear regression viz for i in range(2011, int(acs_year) + 1): FINAL_URL = URL + str(i) + '/' + DATA_SET\ + GET + TOTAL_POPULATION_BURDENED + COMMA\ + GROSS_RENT_PERCENT_INCOME_50_PLUS + COMMA\ + GROSS_RENT_PERCENT_INCOME_25_30 + COMMA\ + GROSS_RENT_PERCENT_INCOME_30_34 + COMMA\ + GROSS_RENT_PERCENT_INCOME_35_39 + COMMA\ + GROSS_RENT_PERCENT_INCOME_40_49\ + COMMA + MED_INCOME\ + FOR + COUNTY + DESCHUTES + COMMA\ + JEFFERSON + COMMA + CROOK \ + IN + STATE + OREGON r = requests.get(url=FINAL_URL + API_KEY) values = r.json() for i in range(1, len(values)): trends[fips_codes[values[i][8]]].append( 100 * (int(values[i][1]) / int(values[i][0]))) trends[fips_codes[values[i][8]]].append( 100 * ((int(values[i][2])) + (int(values[i][3])) + (int(values[i][4]) + (int(values[i][5])))) / int(values[i][0])) trends[fips_codes[values[i][8]]].append(int(values[i][6])) df = pd.DataFrame.from_dict(trends) trans_df = df.transpose() burden = trans_df.iloc[:, ::3] burden = burden.stack().reset_index() burden.rename(columns={ 'level_0': 'county', 0: 'rent burdened' }, inplace=True) severe_burden = trans_df.iloc[:, 1::3] severe_burden = severe_burden.stack().reset_index() severe_burden.rename(columns={ 'level_0': 'county', 0: 'severe rent burdened' }, inplace=True) med_income = trans_df.iloc[:, 2::3] med_income = med_income.stack().reset_index() med_income.rename(columns={ 'level_0': 'county', 0: 'median income' }, inplace=True) final_df = pd.DataFrame(burden['rent burdened']) final_df['severe rent burdened'] = severe_burden['severe rent burdened'] final_df['median income'] = med_income['median income'] counties = ['Crook', 'Deschutes', 'Jefferson'] county = [ ele for ele in counties for _ in range(len(range(2011, int(acs_year) + 1))) ] final_df['county'] = county years = [i for i in range(2011, int(acs_year) + 1)] * 3 final_df['year'] = years sheet = google.worksheet_by_title_wrapper(wb, 'viz historic rent data') google.clear_wrapper(sheet) google.set_dataframe_wrapper(sheet, final_df, (1, 1)) return render_template('dashboard.html')