Beispiel #1
0
import get_bea_data as gbd
import population_cleanup as pc
import recent_pop_cleanup as rpc
import glob
import walkscore as ws
import os

# read in population (1790 - 2010) and rj metrics meetup info (2013-2014) and merge df's
census_pop_df = pc.get_pop_data('data/1790-2010_MASTER.csv')
rj_df = pc.get_rj_data('data/rj_metrics.txt')
new_df = pd.concat([census_pop_df, rj_df], axis=1)

print 'Census data merged to RJ metrics data!'
# clean and join bureau of economic affairs info

raw_bea = gbd.get_bea_data('http://www.bea.gov/newsreleases/regional/gdp_metro/2015/xls/gdp_metro0915.xls')
bea_df = gbd.clean_me(raw_bea)
bea_df = bea_df[:-2]
next_df = pd.concat([new_df, bea_df[bea_df['bea_2014'] > 20000]], axis=1)
print 'Bureau of Economic Affairs data merged!'
# incorporate numbeo data:

url_prefix = 'http://www.numbeo.com/cost-of-living/region_rankings.jsp?title='
url_suffix = '&region=021'
year_list = ['2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016']

urls = ns.build_urls(year_list)
for url in urls:
    soup_can = ns.get_pages(url)
table_list = [ns.clean_up(soup) for soup in soup_can]
zipped = list(zip(year_list, table_list))
Beispiel #2
0
    # 'Name_2010',
    df.drop(['Place Type','CityST', 'ID','LAT_BING', 'LON_BING', '1790','1800','1810', '1820', '1830', '1840', '1850', '1860', '1870', '1880', '1890', '1900', '1910', '1920',
        '1930', '1940',], axis=1, inplace=True)
    return df


def get_rj_data(file_path='data/rj_metrics.txt'):
    '''
    INPUT: File path to rj metrics text file
    OUTPUT: Cleaned dataFrame of file
    '''
    rj_df = pd.read_table(file_path)
    rj_df['state'] = (rj_df['City'].apply(lambda x: x.split(',')[-1]))
    rj_df['city'] = rj_df['City'].apply(lambda x: x.lower().split(',')[0])
    rj_df['city'] = rj_df['city'].apply(lambda x: x.replace(' ', '_').replace('-','_'))
    rj_df.drop('City', axis =1, inplace=True)
    rj_df.set_index(['city'], inplace = True)
    return rj_df


if __name__ == '__main__':
    new_df = pd.concat([df, rj_df], axis=1)
    meetup_df = new_df[new_df['Pop'].notnull()]
    cities = list(meetup_df.index)

    # bureau of economic affairs clean and join
    url = 'http://www.bea.gov/newsreleases/regional/gdp_metro/2015/xls/gdp_metro0915.xls'
    raw_bea = gbd.get_bea_data(url)
    bea_df = gbd.clean_me(raw_bea)
    next_df = pd.concat([new_df, bea_df], axis=0)
            axis=1,
            inplace=True)
    return df


def get_rj_data(file_path='data/rj_metrics.txt'):
    '''
    INPUT: File path to rj metrics text file
    OUTPUT: Cleaned dataFrame of file
    '''
    rj_df = pd.read_table(file_path)
    rj_df['state'] = (rj_df['City'].apply(lambda x: x.split(',')[-1]))
    rj_df['city'] = rj_df['City'].apply(lambda x: x.lower().split(',')[0])
    rj_df['city'] = rj_df['city'].apply(
        lambda x: x.replace(' ', '_').replace('-', '_'))
    rj_df.drop('City', axis=1, inplace=True)
    rj_df.set_index(['city'], inplace=True)
    return rj_df


if __name__ == '__main__':
    new_df = pd.concat([df, rj_df], axis=1)
    meetup_df = new_df[new_df['Pop'].notnull()]
    cities = list(meetup_df.index)

    # bureau of economic affairs clean and join
    url = 'http://www.bea.gov/newsreleases/regional/gdp_metro/2015/xls/gdp_metro0915.xls'
    raw_bea = gbd.get_bea_data(url)
    bea_df = gbd.clean_me(raw_bea)
    next_df = pd.concat([new_df, bea_df], axis=0)
Beispiel #4
0
import get_bea_data as gbd
import population_cleanup as pc
import recent_pop_cleanup as rpc
import glob
import walkscore as ws
import os

# read in population (1790 - 2010) and rj metrics meetup info (2013-2014) and merge df's
census_pop_df = pc.get_pop_data("data/1790-2010_MASTER.csv")
rj_df = pc.get_rj_data("data/rj_metrics.txt")
new_df = pd.concat([census_pop_df, rj_df], axis=1)

print "Census data merged to RJ metrics data!"
# clean and join bureau of economic affairs info

raw_bea = gbd.get_bea_data("http://www.bea.gov/newsreleases/regional/gdp_metro/2015/xls/gdp_metro0915.xls")
bea_df = gbd.clean_me(raw_bea)
bea_df = bea_df[:-2]
next_df = pd.concat([new_df, bea_df[bea_df["bea_2014"] > 20000]], axis=1)
print "Bureau of Economic Affairs data merged!"
# incorporate numbeo data:

url_prefix = "http://www.numbeo.com/cost-of-living/region_rankings.jsp?title="
url_suffix = "&region=021"
year_list = ["2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016"]

urls = ns.build_urls(year_list)
for url in urls:
    soup_can = ns.get_pages(url)
table_list = [ns.clean_up(soup) for soup in soup_can]
zipped = list(zip(year_list, table_list))