Esempio n. 1
0
raw_bea = gbd.get_bea_data('http://www.bea.gov/newsreleases/regional/gdp_metro/2015/xls/gdp_metro0915.xls')
bea_df = gbd.clean_me(raw_bea)
bea_df = bea_df[:-2]
next_df = pd.concat([new_df, bea_df[bea_df['bea_2014'] > 20000]], axis=1)
print 'Bureau of Economic Affairs data merged!'
# incorporate numbeo data:

url_prefix = 'http://www.numbeo.com/cost-of-living/region_rankings.jsp?title='
url_suffix = '&region=021'
year_list = ['2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016']

urls = ns.build_urls(year_list)
for url in urls:
    soup_can = ns.get_pages(url)
table_list = [ns.clean_up(soup) for soup in soup_can]
zipped = list(zip(year_list, table_list))
df_dict = ns.build_data_frames(zipped)

for item in year_list:
    columns= ns.fix_em(['Rank','City','Cost of Living Index','Rent Index','Cost of Living Plus Rent Index',
          'Groceries Index','Restaurant Price Index','Local Purchasing Power Index'])
    first_cols = columns[:2]
    first_cols.extend([column + '_{}'.format(item)for column in columns[2:]])
    df_dict[item].columns = first_cols

def clean_up_df(df):
    df['state'] = df['city'].apply(lambda x: x.split(',')[1].strip().lower().replace(' ', '_'))
    df['city'] = df['city'].apply(lambda x: x.split(',')[0].lower().replace(' ', '_'))
    del df['rank']
    return df
Esempio n. 2
0
raw_bea = gbd.get_bea_data("http://www.bea.gov/newsreleases/regional/gdp_metro/2015/xls/gdp_metro0915.xls")
bea_df = gbd.clean_me(raw_bea)
bea_df = bea_df[:-2]
next_df = pd.concat([new_df, bea_df[bea_df["bea_2014"] > 20000]], axis=1)
print "Bureau of Economic Affairs data merged!"
# incorporate numbeo data:

url_prefix = "http://www.numbeo.com/cost-of-living/region_rankings.jsp?title="
url_suffix = "&region=021"
year_list = ["2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016"]

urls = ns.build_urls(year_list)
for url in urls:
    soup_can = ns.get_pages(url)
table_list = [ns.clean_up(soup) for soup in soup_can]
zipped = list(zip(year_list, table_list))
df_dict = ns.build_data_frames(zipped)

for item in year_list:
    columns = ns.fix_em(
        [
            "Rank",
            "City",
            "Cost of Living Index",
            "Rent Index",
            "Cost of Living Plus Rent Index",
            "Groceries Index",
            "Restaurant Price Index",
            "Local Purchasing Power Index",
        ]
Esempio n. 3
0
def get_walk_data(url):
    doc = requests.get(url).text
    soup = BeautifulSoup(doc, 'lxml')
    return clean_up(soup)
Esempio n. 4
0
def get_walk_data(url):
    doc = requests.get(url).text
    soup = BeautifulSoup(doc, 'lxml')
    return clean_up(soup)