Esempio n. 1
0
def update_sheet():
    params = Params()
    census = Census()
    google = Google()

    data = request.args
    acs_year = str(data['year'])
    params.pword_validate(str(data['pword']))
    acs_year = int(data['year'])  #as int for validation
    params.year_validate(acs_year)
    acs_year = str(acs_year)  #as string for concatentation in query string

    api = google.auth('FLASK_ENV')
    wb = google.open_workbook(api, 'FLASK_ENV')

    API_KEY = census.get_census_api_key()
    URL = 'https://api.census.gov/data/'
    YEAR = acs_year + '/'
    DATA_SET = 'acs/acs5'
    BASE_URL = URL + YEAR + DATA_SET
    GET = '?get='
    GROSS_RENT_PERCENT_INCOME_25_30 = 'B25070_006E'
    GROSS_RENT_PERCENT_INCOME_30_34 = 'B25070_007E'
    GROSS_RENT_PERCENT_INCOME_35_39 = 'B25070_008E'
    GROSS_RENT_PERCENT_INCOME_40_49 = 'B25070_009E'
    GROSS_RENT_PERCENT_INCOME_50_PLUS = 'B25070_010E'
    TOTAL_POPULATION_BURDENED = 'B25070_001E'
    MED_INCOME = 'B06011_001E'

    COMMA = ','
    FOR = '&for='
    IN = '&in='
    ALL_STATES = 'state:*'
    ALL_COUNTY = 'county:*'

    # FINAL_URL = https://api.census.gov/data/2018/acs/acs5?get=B25070_010E&for=county:*&in=state:*
    # this string will get the population of individuals that pay 30 - 50% of their income
    # in rent for all counties in in all state throughout the US.
    FINAL_URL = BASE_URL \
        + GET + GROSS_RENT_PERCENT_INCOME_50_PLUS + COMMA\
        + GROSS_RENT_PERCENT_INCOME_25_30 + COMMA\
        + GROSS_RENT_PERCENT_INCOME_30_34 + COMMA\
        + GROSS_RENT_PERCENT_INCOME_35_39 + COMMA\
        + GROSS_RENT_PERCENT_INCOME_40_49 + COMMA\
        + TOTAL_POPULATION_BURDENED\
        + FOR + ALL_COUNTY\
        + IN + ALL_STATES

    r = requests.get(url=FINAL_URL + API_KEY)
    # values is the return value from the census  API
    values = r.json()
    df = pd.DataFrame(values)
    #headers for df
    df.columns = [
        'GROSS_RENT_PERCENT_INCOME_50_PLUS', 'GROSS_RENT_PERCENT_INCOME_25_30',
        'GROSS_RENT_PERCENT_INCOME_30_34', 'GROSS_RENT_PERCENT_INCOME_35_39',
        'GROSS_RENT_PERCENT_INCOME_40_49', 'TOTAL_POPULATION_BURDENED',
        'state fips', 'county fips'
    ]

    df.drop([0], inplace=True)

    #compute burdening, convert to percent, round to 4 sig fig
    df['PERCENT RENT BURDENED'] = (
        ((pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_25_30']) +
          pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_30_34']) +
          pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_35_39']) +
          pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_40_49'])) /
         pd.to_numeric(df['TOTAL_POPULATION_BURDENED'])) * 100).round(4)
    df['PERCENT SEVERLY RENT BURDENED'] = (
        (pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_50_PLUS']) /
         pd.to_numeric(df['TOTAL_POPULATION_BURDENED'])) * 100).round(4)

    df['fips'] = df['state fips'] + df['county fips']
    df['state'] = df['state fips'].apply(
        lambda x: us.states.lookup(x))  #state name from us lib
    df['county'] = df['fips'].apply(lambda x: fips_dict[x])

    df.dropna(inplace=True)

    #make current date title of sheet
    # title = date.today().strftime("%m-%d-%y")

    sheet = google.worksheet_by_title_wrapper(wb, 'viz burden data')
    google.clear_wrapper(sheet)
    google.set_dataframe_wrapper(sheet, df, (1, 1))
    return 'ayyyyy'
Esempio n. 2
0
def update_sheet():
    params = Params()
    census = Census()
    google = Google()

    data = request.args
    acs_year = str(data['year'])

    params.pword_validate(str(data['pword']))
    acs_year = int(data['year'])  #as int for validation
    params.year_validate(acs_year)
    acs_year = str(acs_year)  #as string for concatentation in query string

    api = google.auth('FLASK_ENV')
    wb = google.open_workbook(api, 'FLASK_ENV')

    fips_codes = {
        "001": "Baker",
        "003": "Benton",
        "005": "Clackamas",
        "007": "Clatsop",
        "009": "Columbia",
        "011": "Coos",
        "013": "Crook",
        "015": "Curry",
        "017": "Deschutes",
        "019": "Douglas",
        "021": "Gilliam",
        "023": "Grant",
        "025": "Harney",
        "027": "Hood River",
        "029": "Jackson",
        "031": "Jefferson",
        "033": "Josephine",
        "035": "Klamath",
        "037": "Lake",
        "039": "Lane",
        "041": "Lincoln",
        "043": "Linn",
        "045": "Malheur",
        "047": "Marion",
        "049": "Morrow",
        "051": "Multnomah",
        "053": "Polk",
        "055": "Sherman",
        "057": "Tillamook",
        "059": "Umatilla",
        "061": "Union",
        "063": "Wallowa",
        "065": "Wasco",
        "067": "Washington",
        "069": "Wheeler",
        "071": "Yamhill"
    }

    API_KEY = census.get_census_api_key()
    URL = 'https://api.census.gov/data/'
    YEAR = acs_year + '/'
    DATA_SET = 'acs/acs5'
    BASE_URL = URL + YEAR + DATA_SET
    GET = '?get='
    MED_GROSS_RENT = 'B25064_001E'
    MED_GROSS_RENT_DOLLARS = 'B25064_001E'
    GROSS_RENT_TOTAL = 'B25063_001E'
    GROSS_RENT_PERCENT_INCOME_25_30 = 'B25070_006E'
    GROSS_RENT_PERCENT_INCOME_30_34 = 'B25070_007E'
    GROSS_RENT_PERCENT_INCOME_35_39 = 'B25070_008E'
    GROSS_RENT_PERCENT_INCOME_40_49 = 'B25070_009E'
    GROSS_RENT_PERCENT_INCOME_50_PLUS = 'B25070_010E'
    TOTAL_POPULATION_BURDENED = 'B25070_001E'
    MED_INCOME = 'B06011_001E'

    COMMA = ','
    FOR = '&for='
    IN = '&in='
    PLUS = '+'
    STATE = 'state:'
    ALL_STATES = 'state:*'
    COUNTY = 'county:'
    OREGON = '41'
    DESCHUTES = '017'
    CROOK = '013'
    JEFFERSON = '031'

    # FINAL_URL = https://api.census.gov/data/2018/acs/acs5?get=B25070_010E&for=county:*&in=state:41
    # this string will get the population of individuals that pay 30 - 50% of their income
    # in rent for all counties in oregon.
    # i.e. one list being ['5690', '41', '047'], meaning 5690 people sampled spend 50% or more of
    # their income on rent in the county 047 (FIPS code for Marion county) in the state 41 (FIPS code for Oregon)
    FINAL_URL = BASE_URL \
        + GET + GROSS_RENT_PERCENT_INCOME_50_PLUS + COMMA\
        + GROSS_RENT_PERCENT_INCOME_25_30 + COMMA\
        + GROSS_RENT_PERCENT_INCOME_30_34 + COMMA\
        + GROSS_RENT_PERCENT_INCOME_35_39 + COMMA\
        + GROSS_RENT_PERCENT_INCOME_40_49 + COMMA\
        + TOTAL_POPULATION_BURDENED\
        + FOR + COUNTY + '*'\
        + IN + STATE + OREGON

    r = requests.get(url=FINAL_URL + API_KEY)
    # values is the return value from the census  API
    values = r.json()
    df = pd.DataFrame(values)
    #headers for df
    df.columns = [
        'GROSS_RENT_PERCENT_INCOME_50_PLUS', 'GROSS_RENT_PERCENT_INCOME_25_30',
        'GROSS_RENT_PERCENT_INCOME_30_34', 'GROSS_RENT_PERCENT_INCOME_35_39',
        'GROSS_RENT_PERCENT_INCOME_40_49', 'TOTAL_POPULATION_BURDENED',
        'state', 'county'
    ]
    # pandas return copies so you must place it in a variable
    df = df.drop([0])

    # this df takes in all the populations of people rent  burdened (25-50% of income), sums them,
    # and then divides the sum by the total population of those surveyed to get
    # the percentage of people burdened, multiplies that value by 100 to get a percent,
    # and then maps the county name from the county  fips code.
    trans_df = pd.DataFrame(df['TOTAL_POPULATION_BURDENED'])
    trans_df['PERCENT RENT BURDENED'] = (
        pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_25_30']) +
        pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_30_34']) +
        pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_35_39']) +
        pd.to_numeric(df['GROSS_RENT_PERCENT_INCOME_40_49'])) / pd.to_numeric(
            df['TOTAL_POPULATION_BURDENED'])
    trans_df['PERCENT SEVERLY RENT BURDENED'] = pd.to_numeric(
        df['GROSS_RENT_PERCENT_INCOME_50_PLUS']) / pd.to_numeric(
            df['TOTAL_POPULATION_BURDENED'])
    # get percents from floats
    trans_df['PERCENT SEVERLY RENT BURDENED'] = trans_df[
        'PERCENT SEVERLY RENT BURDENED'] * 100
    trans_df['PERCENT RENT BURDENED'] = trans_df['PERCENT RENT BURDENED'] * 100

    trans_df['COUNTY FIPS'] = df['county']
    trans_df['COUNTY NAME'] = df['county'].map(fips_codes)
    # #gsheet
    sheet = google.worksheet_by_title_wrapper(wb, 'viz burden data')
    google.clear_wrapper(sheet)
    google.set_dataframe_wrapper(sheet, trans_df, (1, 1))

    county_dict = {"013": "Crook", "017": "Deschutes", "031": "Jefferson"}
    # household_income is a dict of lists to store all income brackets ($10,000 to $14,999, $15,000 to $19,999,...$200,000+)
    household_incomes = {}
    for values in county_dict.values():
        household_incomes[values] = []

    NUM_HOUSEHOLD_INCOME_VARIABLES = 17
    for i in range(2, NUM_HOUSEHOLD_INCOME_VARIABLES + 1):
        # B19001_00 + i + E is a range of income variables in the acs5
        FINAL_URL = BASE_URL \
            + GET + ('B19001_00' if i < 10 else 'B19001_0') + str(i) + 'E' \
            + FOR + COUNTY + CROOK + COMMA\
            + DESCHUTES + COMMA \
            + JEFFERSON\
            + IN + STATE + OREGON

        r = requests.get(url=FINAL_URL + API_KEY)
        values = r.json()
        # get number of individuals in ith bracket and match with respective key
        for i in range(1, len(values)):
            # add to household_income the value which matches the fips value which matches the key in fips_codes
            # household_incomes[fips_codes[values[i][2]]].append(int(values[i][0]))
            # fips_codes[047] = Marion
            # int(values[1][0]) = 5690
            # household_incomes[Marion].append(int(5690)
            # household_incom = {Marion: [5690]}
            household_incomes[fips_codes[values[i][2]]].append(
                int(values[i][0]))

    # this df takes the previous request for household incomes and the normalizes those values
    # this should maybe be changed  to  min/max normalization which is easy to do with the lambda
    df = pd.DataFrame.from_dict(household_incomes)
    trans_df = df.transpose()
    trans_df.columns = [
        'Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999',
        '$20,000 to $24,999', '$25,000 to $29,999', '$30,000 to $34,999',
        '$35,000 to $39,999', '$40,000 to $44,999', '$45,000 to $49,999',
        '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
        '$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999',
        '$200,000 or more'
    ]
    counties_df = pd.DataFrame.from_dict(county_dict, orient='index')
    counties_df.columns = ['county']
    normalized_df = trans_df.apply(lambda x: x / x.max(), axis=1)

    # gsheet
    sheet = google.worksheet_by_title_wrapper(wb, 'viz household income data')
    google.clear_wrapper(sheet)
    google.set_dataframe_wrapper(sheet, counties_df, (1, 1))
    google.set_dataframe_wrapper(sheet, normalized_df, (1, 2))

    # This request is a bit weird. The loop grabs rent burdening data from the ACS from 2011 to the date
    # user supplies and then stores those values in a dictionary, with the county as the main key,
    # years as subsequent child keys, and then the burdening info as the value
    trends = {}
    for values in county_dict.values():
        trends[values] = []
    # historic rent burdening data used in linear regression viz
    for i in range(2011, int(acs_year) + 1):
        FINAL_URL = URL + str(i) + '/' + DATA_SET\
            + GET + TOTAL_POPULATION_BURDENED + COMMA\
            + GROSS_RENT_PERCENT_INCOME_50_PLUS + COMMA\
            + GROSS_RENT_PERCENT_INCOME_25_30 + COMMA\
            + GROSS_RENT_PERCENT_INCOME_30_34 + COMMA\
            + GROSS_RENT_PERCENT_INCOME_35_39 + COMMA\
            + GROSS_RENT_PERCENT_INCOME_40_49\
            + COMMA + MED_INCOME\
            + FOR + COUNTY + DESCHUTES + COMMA\
            + JEFFERSON + COMMA + CROOK \
            + IN + STATE + OREGON

        r = requests.get(url=FINAL_URL + API_KEY)
        values = r.json()
        for i in range(1, len(values)):
            trends[fips_codes[values[i][8]]].append(
                100 * (int(values[i][1]) / int(values[i][0])))
            trends[fips_codes[values[i][8]]].append(
                100 * ((int(values[i][2])) + (int(values[i][3])) +
                       (int(values[i][4]) +
                        (int(values[i][5])))) / int(values[i][0]))
            trends[fips_codes[values[i][8]]].append(int(values[i][6]))

    df = pd.DataFrame.from_dict(trends)
    trans_df = df.transpose()
    burden = trans_df.iloc[:, ::3]
    burden = burden.stack().reset_index()
    burden.rename(columns={
        'level_0': 'county',
        0: 'rent burdened'
    },
                  inplace=True)
    severe_burden = trans_df.iloc[:, 1::3]
    severe_burden = severe_burden.stack().reset_index()
    severe_burden.rename(columns={
        'level_0': 'county',
        0: 'severe rent burdened'
    },
                         inplace=True)
    med_income = trans_df.iloc[:, 2::3]
    med_income = med_income.stack().reset_index()
    med_income.rename(columns={
        'level_0': 'county',
        0: 'median income'
    },
                      inplace=True)

    final_df = pd.DataFrame(burden['rent burdened'])
    final_df['severe rent burdened'] = severe_burden['severe rent burdened']
    final_df['median income'] = med_income['median income']
    counties = ['Crook', 'Deschutes', 'Jefferson']
    county = [
        ele for ele in counties
        for _ in range(len(range(2011,
                                 int(acs_year) + 1)))
    ]
    final_df['county'] = county
    years = [i for i in range(2011, int(acs_year) + 1)] * 3
    final_df['year'] = years

    sheet = google.worksheet_by_title_wrapper(wb, 'viz historic rent data')
    google.clear_wrapper(sheet)
    google.set_dataframe_wrapper(sheet, final_df, (1, 1))

    return render_template('dashboard.html')