def test_wdi_search(self): # Test that a name column exists, and that some results were returned # ...without being too strict about what the actual contents of the # results actually are. The fact that there are some, is good enough. result = search('gdp.*capita.*constant') self.assertTrue(result.name.str.contains('GDP').any())
def test_wdi_search(self): expected = {u('id'): {6716: u('NY.GDP.PCAP.KD'), 6718: u('NY.GDP.PCAP.KN'), 6720: u('NY.GDP.PCAP.PP.KD')}, u('name'): {6716: u('GDP per capita (constant 2005 US$)'), 6718: u('GDP per capita (constant LCU)'), 6720: u('GDP per capita, PPP (constant 2011 ' 'international $)')}} result = search('gdp.*capita.*constant').loc[6716:,['id','name']] expected = pandas.DataFrame(expected) expected.index = result.index assert_frame_equal(result, expected)
def test_wdi_search(): raise nose.SkipTest expected = {'id': {2634: 'GDPPCKD', 4649: 'NY.GDP.PCAP.KD', 4651: 'NY.GDP.PCAP.KN', 4653: 'NY.GDP.PCAP.PP.KD'}, 'name': {2634: 'GDP per Capita, constant US$, millions', 4649: 'GDP per capita (constant 2000 US$)', 4651: 'GDP per capita (constant LCU)', 4653: 'GDP per capita, PPP (constant 2005 international $)'}} result = search('gdp.*capita.*constant').ix[:, :2] expected = pandas.DataFrame(expected) expected.index = result.index assert_frame_equal(result, expected)
def test_wdi_search(): raise nose.SkipTest expected = {u'id': {2634: u'GDPPCKD', 4649: u'NY.GDP.PCAP.KD', 4651: u'NY.GDP.PCAP.KN', 4653: u'NY.GDP.PCAP.PP.KD'}, u'name': {2634: u'GDP per Capita, constant US$, millions', 4649: u'GDP per capita (constant 2000 US$)', 4651: u'GDP per capita (constant LCU)', 4653: u'GDP per capita, PPP (constant 2005 international $)'}} result = search('gdp.*capita.*constant').ix[:, :2] expected = pandas.DataFrame(expected) expected.index = result.index assert_frame_equal(result, expected)
def test_wdi_search(): raise nose.SkipTest("skipping for now") expected = {u('id'): {2634: u('GDPPCKD'), 4649: u('NY.GDP.PCAP.KD'), 4651: u('NY.GDP.PCAP.KN'), 4653: u('NY.GDP.PCAP.PP.KD')}, u('name'): {2634: u('GDP per Capita, constant US$, ' 'millions'), 4649: u('GDP per capita (constant 2000 US$)'), 4651: u('GDP per capita (constant LCU)'), 4653: u('GDP per capita, PPP (constant 2005 ' 'international $)')}} result = search('gdp.*capita.*constant').ix[:, :2] expected = pandas.DataFrame(expected) expected.index = result.index assert_frame_equal(result, expected)
def test_wdi_search(self): expected = { u('id'): { 6716: u('NY.GDP.PCAP.KD'), 6718: u('NY.GDP.PCAP.KN'), 6720: u('NY.GDP.PCAP.PP.KD') }, u('name'): { 6716: u('GDP per capita (constant 2005 US$)'), 6718: u('GDP per capita (constant LCU)'), 6720: u('GDP per capita, PPP (constant 2011 ' 'international $)') } } result = search('gdp.*capita.*constant').loc[6716:, ['id', 'name']] expected = pandas.DataFrame(expected) expected.index = result.index assert_frame_equal(result, expected)
def test_wdi_search(): raise nose.SkipTest("skipping for now") expected = { u('id'): { 2634: u('GDPPCKD'), 4649: u('NY.GDP.PCAP.KD'), 4651: u('NY.GDP.PCAP.KN'), 4653: u('NY.GDP.PCAP.PP.KD') }, u('name'): { 2634: u('GDP per Capita, constant US$, ' 'millions'), 4649: u('GDP per capita (constant 2000 US$)'), 4651: u('GDP per capita (constant LCU)'), 4653: u('GDP per capita, PPP (constant 2005 ' 'international $)') } } result = search('gdp.*capita.*constant').ix[:, :2] expected = pandas.DataFrame(expected) expected.index = result.index assert_frame_equal(result, expected)
df ### Some Data Munging Tools: Append, Concat, Group By # Let's download some World Bank's World Development Indicators. # # > This example is largely based on the "World Bank" section of *pandas 0.13.1 documentation* available [here](http://pandas.pydata.org/pandas-docs/stable/remote_data.html) but was expanded to demonstrate more methods and functions. # First, we download a GDP per capita series and a fertility rate. The search method shows available series. # In[154]: from pandas.io import wb wb.search('fertility').iloc[:, :2] # Let's choose two series: one fore GDP per capita and another for Total Fertility Rate. We request all the available countries and some years. # In[155]: ind = ['NY.GDP.PCAP.KD', 'SP.DYN.TFRT.IN'] df = wb.download(indicator=ind, country='all', start=1950, end=2014) # Shorten the column labels. and let's see the dataframe. It has a MultiIndex (or hierarchical index). # In[156]: df.columns = ['gdp', 'tfr']
""" import spss ??? #%% import pandas.io.data as web import datetime as dt import matplotlib.pylab as plt OLD PROGRAM FROM HERE """ 1. Read in GDP per capita """ from pandas.io import wb wb.search('gdp.*capita.*const').iloc[:,:2] dat = wb.download(indicator='NY.GDP.PCAP.KD', country=['US', 'CA', 'MX'], start=2005, end=2008) dat['NY.GDP.PCAP.KD'].groupby(level=0).mean() wb.search('cell.*%').iloc[:,:2] ind = ['NY.GDP.PCAP.KD', 'IT.MOB.COV.ZS'] dat = wb.download(indicator=ind, country='all', start=2011, end=2011).dropna() dat.columns = ['gdp', 'cellphone'] """ 2. Read in complete csv (see Sargent-Stachurski) """ #%%
tsla.add_feed(YahooFinanceFT('TSLA')) tsla.add_feed(GoogleFinanceFT('TSLA')) #tsla.add_feed(QuandlFT('GOOG/NASDAQ_TSLA', fieldname='Close')) tsla.cache() cpi = sm.create('CPI', overwrite=True) cpi.add_tags(['Consumer', 'Price Index', 'Seasonally Adjusted']) cpi.set_description("Consumer Price Index for All Urban Consumers: All Items") cpi.add_meta(Geography='USA', Factor='Inflation', Publisher="BLS") cpi.set_units("MoM") cpi.add_feed(StLouisFEDFT('CPIAUCSL')) cpi.cache() from pandas.io import wb results = wb.search('GDP*') results = results[results.id == 'NY.GDP.MKTP.CD'] r = results.T.to_dict().values()[0] r = {key.replace("source","WB") : value for key, value in r.iteritems()} ctrycodes = ['ABW', 'AFG', 'AGO', 'ALB', 'AND', 'ARE', 'ARG', 'ARM', 'ASM', 'ATG', 'AUS', 'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD', 'BGR', 'BHR', 'BHS', 'BIH', 'BLR', 'BLZ', 'BMU', 'BOL', 'BRA', 'BRB', 'BRN', 'BTN', 'BWA', 'CAF', 'CAN', 'CHE', 'CHL', 'CHN', 'CIV', 'CMR', 'COD', 'COG', 'COL', 'COM', 'CPV', 'CRI', 'CUB', 'CUW', 'CYM', 'CYP', 'CZE', 'DEU', 'DJI', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'EGY', 'ERI', 'ESP', 'EST', 'ETH', 'FIN', 'FJI', 'FRA', 'FRO', 'FSM', 'GAB', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB', 'GNB', 'GNQ', 'GRC', 'GRD', 'GRL', 'GTM', 'GUM', 'GUY', 'HKG', 'HND', 'HRV', 'HTI', 'HUN', 'IDN', 'IMN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JOR', 'JPN', 'KAZ', 'KEN', 'KGZ', 'KHM', 'KIR', 'KNA', 'KOR', 'KWT', 'LAO', 'LBN', 'LBR', 'LBY', 'LCA', 'LIE', 'LKA', 'LSO', 'LTU', 'LUX', 'LVA', 'MAC', 'MAF', 'MAR', 'MCO', 'MDA', 'MDG', 'MDV', 'MEX', 'MHL', 'MKD', 'MLI', 'MLT', 'MMR', 'MNE', 'MNG', 'MNP', 'MOZ', 'MRT', 'MUS', 'MWI', 'MYS', 'NAM', 'NCL', 'NER', 'NGA', 'NIC', 'NLD', 'NOR', 'NPL', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'POL', 'PRI', 'PRK', 'PRT', 'PRY', 'PSE', 'PYF', 'QAT', 'ROU', 'RUS', 'RWA', 'SAU', 'SDN', 'SEN', 'SGP', 'SLB', 'SLE', 'SLV', 'SMR', 'SOM', 'SRB', 'SSD', 'STP', 'SUR', 'SVK', 'SVN', 'SWE', 'SWZ', 'SXM', 'SYC', 'SYR', 'TCA', 'TCD', 'TGO', 'THA', 'TJK', 'TKM', 'TLS', 'TON', 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UGA', 'UKR', 'URY', 'USA', 'UZB', 'VCT', 'VEN', 'VIR', 'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'ZMB', 'ZWE'] badlist = [] for cc in ctrycodes: # just to make a copy meta = dict(r) tickr = "GDP_" + cc wbi = sm.create(tickr, overwrite=True) #awkward, that this is the only way to get this from the API country = wb.download(indicator='NY.GDP.MKTP.CD',country=cc).index.levels[0][0]
def search_wb(query): return wb.search(query)[["id","name","source"]]
df['b'] = df['b'].fillna(999) df ### Some Data Munging Tools: Append, Concat, Group By # Let's download some World Bank's World Development Indicators. # # > This example is largely based on the "World Bank" section of *pandas 0.13.1 documentation* available [here](http://pandas.pydata.org/pandas-docs/stable/remote_data.html) but was expanded to demonstrate more methods and functions. # First, we download a GDP per capita series and a fertility rate. The search method shows available series. # In[154]: from pandas.io import wb wb.search('fertility').iloc[:, :2] # Let's choose two series: one fore GDP per capita and another for Total Fertility Rate. We request all the available countries and some years. # In[155]: ind = ['NY.GDP.PCAP.KD', 'SP.DYN.TFRT.IN'] df = wb.download(indicator=ind, country='all', start=1950, end=2014) # Shorten the column labels. and let's see the dataframe. It has a MultiIndex (or hierarchical index). # In[156]: df.columns = ['gdp', 'tfr'] df.head()