Ejemplo n.º 1
0
def get_state_scores(jobs, prices):
    """
    Scale jobs/state by average house price/state
    """
    # Add state abbreviations to prices dataframe
    fips = get_fips_data()
    prices = pd.merge(
        prices,
        fips.groupby(['state_long',
                      'state']).count().reset_index()[['state_long', 'state']],
        how='left',
        on=['state_long'])

    jobs_per_state = jobs.groupby(['state'])['id'].count().to_dict()
    prices_per_state = prices[['state', 'average_price'
                               ]].set_index('state').to_dict()['average_price']

    scores_jobs = []
    scores_prices = []
    for state in jobs_per_state:
        scores_jobs.append(float(jobs_per_state[state]))
        scores_prices.append(float(prices_per_state[state]))

    max_job_score = max(scores_jobs)
    max_price_score = max(scores_prices)

    scores = [(x / max_job_score) / (y / max_price_score)
              for x, y in zip(scores_jobs, scores_prices)]
    max_score = max(scores)
    scores = [x / max_score for x in scores]

    return dict(zip(jobs_per_state.keys(), scores))
Ejemplo n.º 2
0
def get_state_scores(jobs, prices):
    """
    Scale jobs/state by average house price/state
    """
    # Add state abbreviations to prices dataframe
    fips = get_fips_data()
    prices = pd.merge(prices, fips.groupby(['state_long','state']).count().reset_index()[['state_long','state']], how='left', on=['state_long'])

    jobs_per_state = jobs.groupby(['state'])['id'].count().to_dict()
    prices_per_state = prices[['state','average_price']].set_index('state').to_dict()['average_price']
    
    scores_jobs = []
    scores_prices = []
    for state in jobs_per_state:
        scores_jobs.append(float(jobs_per_state[state]))
        scores_prices.append(float(prices_per_state[state]))
    
    max_job_score = max(scores_jobs)
    max_price_score = max(scores_prices)
    
    scores = [(x/max_job_score)/(y/max_price_score) for x,y in zip(scores_jobs,scores_prices)]
    max_score = max(scores)
    scores = [x/max_score for x in scores]
    
    return dict(zip(jobs_per_state.keys(), scores))
Ejemplo n.º 3
0
def get_populations_scores(populations):
    fips = get_fips_data()
    popfips = pd.merge(fips[['state', 'county', 'fips_state', 'fips_county']],
                       populations,
                       on=['county', 'state'])

    growth_per_county = dict(
        zip(zip(popfips['fips_state'], popfips['fips_county']),
            (popfips['population2010'] / popfips['population2000']) - 1.))
    return calculate_scores(growth_per_county)
Ejemplo n.º 4
0
def get_houseprices_scores(houseprices):
    fips = get_fips_data()
    pricefips = pd.merge(fips[['state', 'county', 'fips_state',
                               'fips_county']],
                         houseprices,
                         on=['county', 'state'])

    price_per_county = dict(
        zip(zip(pricefips['fips_state'], pricefips['fips_county']),
            -pricefips['average_price'].values))
    return calculate_scores(price_per_county)
Ejemplo n.º 5
0
jobs = None

for pk in profession_keys:
    if jobs is None:
        jobs = load_jobs(pk)
    else:
        new_jobs = load_jobs(pk)
        jobs = pd.concat((jobs, new_jobs), ignore_index=True)

jobs_per_state = jobs.groupby(['state'])['id'].count().to_dict()


prices = get_average_prices_data()

# Add state abbreviations to prices dataframe
fips = get_fips_data()
prices = pd.merge(prices, fips.groupby(['state_long','state']).count().reset_index()[['state_long','state']], how='left', on=['state_long'])

for p in range(12):
    max_price = 150000. + float(p)*50000
    prices_per_state = prices.loc[prices['average_price'] < max_price, ['state','average_price']].set_index('state').to_dict()['average_price']

    if len(prices_per_state) > 0:
        counts = []
        for state in prices_per_state:
            try:
                counts.append(float(jobs_per_state[state]))
            except KeyError:
                continue
    
        max_jobs = max(counts)
Ejemplo n.º 6
0
jobs = None

for pk in profession_keys:
    if jobs is None:
        jobs = load_jobs(pk)
    else:
        new_jobs = load_jobs(pk)
        jobs = pd.concat((jobs, new_jobs), ignore_index=True)

jobs_per_state = jobs.groupby(['state'])['id'].count().to_dict()

prices = get_average_prices_data()

# Add state abbreviations to prices dataframe
fips = get_fips_data()
prices = pd.merge(
    prices,
    fips.groupby(['state_long',
                  'state']).count().reset_index()[['state_long', 'state']],
    how='left',
    on=['state_long'])

for p in range(12):
    max_price = 150000. + float(p) * 50000
    prices_per_state = prices.loc[prices['average_price'] < max_price,
                                  ['state', 'average_price']].set_index(
                                      'state').to_dict()['average_price']

    if len(prices_per_state) > 0:
        counts = []
Ejemplo n.º 7
0
def get_houseprices_scores(houseprices):
    fips = get_fips_data()
    pricefips = pd.merge(fips[['state','county','fips_state','fips_county']], houseprices, on=['county', 'state'])
    
    price_per_county = dict(zip(zip(pricefips['fips_state'], pricefips['fips_county']), -pricefips['average_price'].values))
    return calculate_scores(price_per_county)
Ejemplo n.º 8
0
def get_populations_scores(populations):
    fips = get_fips_data()
    popfips = pd.merge(fips[['state','county','fips_state','fips_county']], populations, on=['county', 'state'])
    
    growth_per_county = dict(zip(zip(popfips['fips_state'], popfips['fips_county']), (popfips['population2010']/popfips['population2000']) - 1.))
    return calculate_scores(growth_per_county)