def web_plotting_plots(): # static start = dt.datetime(2010, 1, 1) end = dt.datetime(2013, 1, 27) data = DataReader("MSFT", 'google', start, end) data = data.reset_index() fig, ax = plt.subplots() data.plot(x='Date', y='Close', grid=True, ax=ax) plt.savefig(PATH + 'MSFT.png', dpi=300) # interactive - may not work in cloud 9 warnings.simplefilter('ignore') py.sign_in('Python-Demo-Account', 'gwt101uhh0') # to interactive D3.js plot py.iplot_mpl(fig) # direct approach with Cufflinks data.set_index('Date')['Close'].iplot(world_readable=True)
import pandas as pd import matplotlib.pyplot as plt from pandas_datareader.data import DataReader from datetime import date start = date(1900,1,1) # default Jan 1, 2010 series_code = 'DGS10' # 10-year Treasury Rate data_source = 'fred' # FED Economic Data Service data = DataReader(series_code, data_source, start) data.info() pd.concat([data.head(3), data.tail(3)]) series_name = '10-year Treasury' data = data.rename(columns={series_code: series_name}) data.plot(title=series_name) plt.show()
### Visualize the long-term gold price trend # Set start date start = date(1968, 1, 1) # Set series code series = 'GOLDAMGBD228NLBM' # Import the data gold_price = DataReader(series, 'fred', start=start) # Inspect the price of gold gold_price.info() # Plot the price of gold gold_price.plot() # Show the plot plt.show() ### Compare labor market participation and unemployment rates # Set the start date start = date(1950, 1, 1) # Define the series codes series = ['UNRATE', 'CIVPART'] # Import the data econ_data = DataReader(series, 'fred', start)
nyse['Market Capitalization'].idxmax() #Index of max value nyse['Sector'].unique() #Unique values as numpy array tech = nyse.loc[nyse.Sector == 'Technology'] #only select Technology stocks nyse.loc[nyse.Sector == 'Technology', 'Market Capitalization'].idxmax() ticker = nyse.loc[(nyse.Sector == 'Technology') & (nyse['IPO Year'] == 2017), 'Market Capitalization'].idxmax() data = DataReader(ticker, 'google') #Start: 2010/1/1 data = data.loc[:, ['Close', 'Volume']] #alternative data = data[['Close','Volume']] data.plot(title=ticker, secondary_y='Volume') plt.tight_layout() plt.show() #select the top 5 listed consumer companies xls = pd.ExcelFile('listings.xlsx') # pd.ExcelFile object exchanges = xls.sheet_names #exhcange contails the sheet names listings = [] #listings is an empty list for exchange in exchanges: listing = pd.read_excel(xls, sheetname=exchange) listing['Exchange'] = exchange listings.append(listing) #Add DataFrame to list combined_listings = pd.concat(listings) #create a DataFrame out of list consumer_services = combined_listings.loc[combined_listings.Sector ==
ma_day=[5,20,60] for ma in ma_day: column_name=f"MA for {str(ma)} days" AAPL[column_name]=AAPL['Adj Close'].rolling(ma).mean() AAPL[['Adj Close','MA for 5 days','MA for 20 days','MA for 60 days']].plot(subplots=False,legend=True,figsize=(15,6)) plt.show() AAPL['Daily Return']=AAPL['Adj Close'].pct_change() #AAPL['Daily Return'].plot(figsize=(15,6),legend=True) sns.distplot(AAPL['Daily Return'].dropna(),bins=100,color='green') plt.show() ''' closing_df = DataReader(com_list, 'yahoo', start, end)['Adj Close'] tech_rtrn = closing_df.pct_change() rets = tech_rtrn.dropna() ''' closing_df.plot(figsize=(15,6)) plt.show() #sns.jointplot('AAPL','AMZN',tech_rtrn,kind='scatter',s=3) rtrnfig=sns.PairGrid(closing_df) rtrnfig.map_diag(sns.distplot,bins=40,color='green') rtrnfig.map_upper(plt.scatter,s=2) rtrnfig.map_lower(sns.kdeplot,cmap='coolwarm') plt.show() corr=closing_df.dropna().corr() sns.heatmap(corr,annot=True,center=0.5,cmap='coolwarm') plt.show() area=np.pi*20 plt.scatter(rets.mean(),rets.std(),s=area)
series_code = 'DGS10' #10-year Treasury Rate data_source = 'fred' #FED Economic Data Service start = date(1962, 1, 1) #start date from earliest available, skip end date data = DataReader(series_code, data_source, start) data.info() series_name = '10-year Treasury' data = data.rename(columns={series_code: series_name}) #could also do data.columns = [series_name] data.plot(title=series_name) plt.show() #Combine stock and economic data start = date(2000, 1, 1) series = 'DCOILWTICO' #West Texas Intermediate Oil Price oil = DataReader(series, 'fred', start) ticker = 'XOM' #Exxon Mobile Corporation stock = DataReader(ticker, 'google', start) data2 = pd.concat([stock[['Close']], oil], axis=1) #[['Close']] is DataFrame #['Close'] is DataSeries
def dynamic_factor_model_example(): np.set_printoptions(precision=4, suppress=True, linewidth=120) # Get the datasets from FRED. start = '1979-01-01' end = '2014-12-01' indprod = DataReader('IPMAN', 'fred', start=start, end=end) income = DataReader('W875RX1', 'fred', start=start, end=end) sales = DataReader('CMRMTSPL', 'fred', start=start, end=end) emp = DataReader('PAYEMS', 'fred', start=start, end=end) #dta = pd.concat((indprod, income, sales, emp), axis=1) #dta.columns = ['indprod', 'income', 'sales', 'emp'] #HMRMT = DataReader('HMRMT', 'fred', start='1967-01-01', end=end) #CMRMT = DataReader('CMRMT', 'fred', start='1997-01-01', end=end) #HMRMT_growth = HMRMT.diff() / HMRMT.shift() #sales = pd.Series(np.zeros(emp.shape[0]), index=emp.index) # Fill in the recent entries (1997 onwards). #sales[CMRMT.index] = CMRMT # Backfill the previous entries (pre 1997). #idx = sales.loc[:'1997-01-01'].index #for t in range(len(idx)-1, 0, -1): # month = idx[t] # prev_month = idx[t-1] # sales.loc[prev_month] = sales.loc[month] / (1 + HMRMT_growth.loc[prev_month].values) dta = pd.concat((indprod, income, sales, emp), axis=1) dta.columns = ['indprod', 'income', 'sales', 'emp'] dta.loc[:, 'indprod':'emp'].plot(subplots=True, layout=(2, 2), figsize=(15, 6)); # Create log-differenced series. dta['dln_indprod'] = (np.log(dta.indprod)).diff() * 100 dta['dln_income'] = (np.log(dta.income)).diff() * 100 dta['dln_sales'] = (np.log(dta.sales)).diff() * 100 dta['dln_emp'] = (np.log(dta.emp)).diff() * 100 # De-mean and standardize. dta['std_indprod'] = (dta['dln_indprod'] - dta['dln_indprod'].mean()) / dta['dln_indprod'].std() dta['std_income'] = (dta['dln_income'] - dta['dln_income'].mean()) / dta['dln_income'].std() dta['std_sales'] = (dta['dln_sales'] - dta['dln_sales'].mean()) / dta['dln_sales'].std() dta['std_emp'] = (dta['dln_emp'] - dta['dln_emp'].mean()) / dta['dln_emp'].std() # Get the endogenous data. endog = dta.loc['1979-02-01':, 'std_indprod':'std_emp'] # Create the model. mod = sm.tsa.DynamicFactor(endog, k_factors=1, factor_order=2, error_order=2) initial_res = mod.fit(method='powell', disp=False) res = mod.fit(initial_res.params, disp=False) print(res.summary(separate_params=False)) # Estimated factors. fig, ax = plt.subplots(figsize=(13, 3)) # Plot the factor. dates = endog.index._mpl_repr() ax.plot(dates, res.factors.filtered[0], label='Factor') ax.legend() # Retrieve and also plot the NBER recession indicators. rec = DataReader('USREC', 'fred', start=start, end=end) ylim = ax.get_ylim() ax.fill_between(dates[:-3], ylim[0], ylim[1], rec.values[:-4,0], facecolor='k', alpha=0.1) # Post-estimation. res.plot_coefficients_of_determination(figsize=(8, 2)) # Coincident index. usphci = DataReader('USPHCI', 'fred', start='1979-01-01', end='2014-12-01')['USPHCI'] usphci.plot(figsize=(13, 3)) dusphci = usphci.diff()[1:].values fig, ax = plt.subplots(figsize=(13, 3)) # Compute the index. coincident_index = compute_coincident_index(mod, res, dta, usphci, dusphci) # Plot the factor. dates = endog.index._mpl_repr() ax.plot(dates, coincident_index, label='Coincident index') ax.plot(usphci.index._mpl_repr(), usphci, label='USPHCI') ax.legend(loc='lower right') # Retrieve and also plot the NBER recession indicators. ylim = ax.get_ylim() ax.fill_between(dates[:-3], ylim[0], ylim[1], rec.values[:-4,0], facecolor='k', alpha=0.1) # Extended dynamic factor model. # Create the model. extended_mod = ExtendedDFM(endog) initial_extended_res = extended_mod.fit(maxiter=1000, disp=False) extended_res = extended_mod.fit(initial_extended_res.params, method='nm', maxiter=1000) print(extended_res.summary(separate_params=False)) extended_res.plot_coefficients_of_determination(figsize=(8, 2)) fig, ax = plt.subplots(figsize=(13,3)) # Compute the index. extended_coincident_index = compute_coincident_index(extended_mod, extended_res, dta, usphci, dusphci) # Plot the factor. dates = endog.index._mpl_repr() ax.plot(dates, coincident_index, '-', linewidth=1, label='Basic model') ax.plot(dates, extended_coincident_index, '--', linewidth=3, label='Extended model') ax.plot(usphci.index._mpl_repr(), usphci, label='USPHCI') ax.legend(loc='lower right') ax.set(title='Coincident indices, comparison') # Retrieve and also plot the NBER recession indicators. ylim = ax.get_ylim() ax.fill_between(dates[:-3], ylim[0], ylim[1], rec.values[:-4,0], facecolor='k', alpha=0.1)
end = date(2017, 11, 06) # DataReader is a function to import, there are different sources available to import data # such as ggogle fin, yahoo fin,fred, Oanda(for exchange rates) # for eg Importing FB data from goolge stockFb = DataReader('fb', 'google', start, end) type(stockFb) # DataReader returns a pandas data frame object stockFb.head() stockFb.info() # from yahoo stockApl = DataReader('AAPL', 'yahoo', start, end) stockApl.head() stockApl.info() #plotting stockApl['Close'].plot(title='APPLE') plt.show() #sp500 from fred up to now sp500 = DataReader('SP500', 'fred', start) #note sys date is deafult for end argument sp500.tail() sp500.plot(title='SP500') #saving locally sp500.to_csv('SP500')
#stock price START_DATE = date(2018, 1, 1) END_DATE = date(2018, 12, 31) TICKER = 'AMZN' DATA_SOURCE = 'yahoo' stock_data = DataReader(TICKER, DATA_SOURCE, START_DATE, END_DATE) stock_data['High'].plot(title=TICKER) plt.show() #interest rates RATE = 'DGS-10' DATA_SOURCE = 'fred' START_DATE = date(2018, 1, 1) rates_data = DataReader(RATE, DATA_SOURCE, START_DATE) rates_data.head() stock_data['High'].plot(title=RATE) plt.show() #bonds and stocks START_DATE = date(2008, 1, 1) SERIES = ['BAMLHYH0A0HYM2TRIV', 'SP500'] data = DataReader(SERIES, 'fred', START_DATE) data.plot(title=" & ".join(SERIES) + " comparison") plt.show()
from datetime import date from pandas_datareader.data import DataReader import matplotlib.pyplot as plt # Set the start date start = date(1950, 1, 1) # Define the series codes series = ['UNRATE', 'CIVPART'] # Import the data econ_data = DataReader(series, 'fred', start) # Assign new column labels econ_data.columns = ['Unemployment Rate', 'Participation Rate'] # Plot econ_data econ_data.plot(title='Labor Market', subplots=True) # subplots renders a plot for each dframe # Show the plot plt.show()
import numpy as np from scipy import stats import pandas as pd import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.graphics.api import qqplot from pandas_datareader.data import DataReader dta = DataReader('UNRATE', 'fred', start='1954-01-01') print(dta) dta.plot(figsize=(12, 8)) fig = plt.figure(figsize=(12, 6)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2) arma_mod20 = sm.tsa.ARMA(dta, (2, 0)).fit(disp=False) print(arma_mod20.params) arma_mod30 = sm.tsa.ARMA(dta, (3, 0)).fit(disp=False) resid = arma_mod30.resid stats.normaltest(resid) fig = plt.figure(figsize=(12, 6)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) predict_sunspots = arma_mod30.predict('1990', '2012', dynamic=True) print(predict_sunspots) fig, ax = plt.subplots(figsize=(12, 6))
nyse.info() # you get the information Index: 3147 entries, JNJ to EAE # Now you can search by idxmax() nyse['Market Capitalization'].idxmax() # index of max value => JNJ again #### Get data for largest tech company with 2017 IPO ticker = nyse.loc[(nyse.Sector == 'Technology') & (nyse['IPO Year'] == 2007), 'Market Captitalization'].idxmax() # Multiple conditions: Parentheses and logical operators data = DataReader(ticker, 'google') # start = default = 1.1.2010 # select close and volume: data = data.loc[:, ['Close', 'Volume']] #### Plotting these data import matplotlib.pyplot as plt data.plot(title=ticker, secondary_y='Volume' ) # secondary_y: column on tight axis with different scale plt.tight_layout() # improving layout by reducing white spaces plt.show() #################### import pandas as pd from pandas_datareader.data import DataReader from datetime import date start = date(2015, 1, 1) # default Jan 1, 2010 end = date(2016, 12, 31) # default: today ticker = 'GOOG' data_source = 'google' stock_data = DataReader(ticker, data_source, start, end) stock_data.info() pd.concat([stock_data.head(3), stock_data.tail(3)])
# 3. Fix all parameters (so that no parameters are estimated) # from importlib import reload import numpy as np import pandas as pd import statsmodels.api as sm import matplotlib.pyplot as plt from pandas_datareader.data import DataReader # To illustrate, we will use the Consumer Price Index for Apparel, which # has a time-varying level and a strong seasonal component. endog = DataReader('CPIAPPNS', 'fred', start='1980').asfreq('MS') endog.plot(figsize=(15, 3)) # It is well known (e.g. Harvey and Jaeger [1993]) that the HP filter # output can be generated by an unobserved components model given certain # restrictions on the parameters. # # The unobserved components model is: # # $$ # \begin{aligned} # y_t & = \mu_t + \varepsilon_t & \varepsilon_t \sim N(0, # \sigma_\varepsilon^2) \\ # \mu_t &= \mu_{t-1} + \beta_{t-1} + \eta_t & \eta_t \sim N(0, # \sigma_\eta^2) \\ # \beta_t &= \beta_{t-1} + \zeta_t & \zeta_t \sim N(0, \sigma_\zeta^2) \\ # \end{aligned}
# ## Coincident Index # # As described above, the goal of this model was to create an # interpretable series which could be used to understand the current status # of the macroeconomy. This is what the coincident index is designed to do. # It is constructed below. For readers interested in an explanation of the # construction, see Kim and Nelson (1999) or Stock and Watson (1991). # # In essense, what is done is to reconstruct the mean of the (differenced) # factor. We will compare it to the coincident index on published by the # Federal Reserve Bank of Philadelphia (USPHCI on FRED). usphci = DataReader('USPHCI', 'fred', start='1979-01-01', end='2014-12-01')['USPHCI'] usphci.plot(figsize=(13, 3)) dusphci = usphci.diff()[1:].values def compute_coincident_index(mod, res): # Estimate W(1) spec = res.specification design = mod.ssm['design'] transition = mod.ssm['transition'] ss_kalman_gain = res.filter_results.kalman_gain[:, :, -1] k_states = ss_kalman_gain.shape[0] W1 = np.linalg.inv( np.eye(k_states) - np.dot(np.eye(k_states) - np.dot(ss_kalman_gain, design), transition)
# ## Coincident Index # # As described above, the goal of this model was to create an # interpretable series which could be used to understand the current status # of the macroeconomy. This is what the coincident index is designed to do. # It is constructed below. For readers interested in an explanation of the # construction, see Kim and Nelson (1999) or Stock and Watson (1991). # # In essense, what is done is to reconstruct the mean of the (differenced) # factor. We will compare it to the coincident index on published by the # Federal Reserve Bank of Philadelphia (USPHCI on FRED). usphci = DataReader( 'USPHCI', 'fred', start='1979-01-01', end='2014-12-01')['USPHCI'] usphci.plot(figsize=(13, 3)) dusphci = usphci.diff()[1:].values def compute_coincident_index(mod, res): # Estimate W(1) spec = res.specification design = mod.ssm['design'] transition = mod.ssm['transition'] ss_kalman_gain = res.filter_results.kalman_gain[:, :, -1] k_states = ss_kalman_gain.shape[0] W1 = np.linalg.inv( np.eye(k_states) - np.dot(np.eye(k_states) - np.dot(ss_kalman_gain, design), transition)
start = date(1950,1,1) # Define the series codes series = ['UNRATE', 'CIVPART'] # Import the data econ_data = DataReader(series,'fred',start) # Assign new column labels econ_data.columns = ['Unemployment Rate','Participation Rate'] econ_data = econ_data.reset_index() econ_data['decade'] = econ_data['DATE'].astype(str).str[2:3] econ_data['decade'] = econ_data['decade'].astype(str) econ_data['decade'] = econ_data['decade'] + "0s" # Plot econ_data _ = econ_data.plot(subplots=True,title='Labor Market') plt.margins(.2) # Show the plot plt.show() data = econ_data['Unemployment Rate'] x,y = ecdf(data) _ = plt.plot(x, y, marker='.', linestyle='none') _ = plt.xlabel('Unemployment Rate') _ = plt.ylabel('CDF') _ = plt.margins(0.2) plt.show() _ = plt.plot(econ_data['Unemployment Rate'],econ_data['Participation Rate'], marker='.', linestyle='none') _ = plt.xlabel('Participation Rate') _ = plt.ylabel('Participation Rate') _ = plt.margins(0.2)
plt.show() #Use date() to set start to January 1, 1968, and set series to series code 'GOLDAMGBD228NLBM'. #Pass series as the data,'fred' as the data source, and start as the start date to DataReader(). Assign to gold_price. #Inspect gold_price using .info(). #Plot and show the gold_price series with title 'Gold Price'. start = date(1968, 1, 1) series = 'GOLDAMGBD228NLBM' gold_price = DataReader(series, 'fred', start=start) gold_price.info() gold_price.plot(title="Gold Price") plt.show() #Using date(), set start to January 1, 1950. #Create series as a list containing the series codes 'UNRATE' and 'CIVPART', in that order. #Pass series, the data source 'fred', and the start date to DataReader(), and assign the result to econ_data. #Use the .columns attribute to assign 'Unemployment Rate' and 'Participation Rate' as the new column labels. #Plot and show econ_data using the subplots=True argument, and title it 'Labor Market'. start = date(1950, 1, 1) series = ['UNRATE', 'CIVPART'] econ_data = DataReader(series, 'fred', start=start)
import matplotlib.pyplot as plt from pandas_datareader.data import DataReader # date time to use date objects from datetime import date # Time period of import, start and end dates start = date(2017,10,01) end = date(2017,11,06) stockApl = DataReader('AAPL', 'yahoo', start, end)['Close'] stockApl.head() #plotting stockApl.plot(title='APPLE') plt.show() #calculating daily returns of the stock dr_apl = stockApl.pct_change(1) #encoding for comparison dr_apl[ dr_apl <0 ] = 0 dr_apl[ dr_apl >0 ] = 4 #removing Nan dr_apl=dr_apl[1:] check['stock']=dr_apl
""" Created on Tue Oct 31 23:46:25 2017 @author: James """ from pandas_datareader.data import DataReader from datetime import date import matplotlib.pyplot as plt import seaborn as sns import pandas as pd ty10 = DataReader('DGS10', 'fred', date(1962, 1, 1)) ty10.dropna(inplace=True) ty10.plot(title='10-year Treasury') plt.tight_layout() plt.show() #using seaborn with Kernal Desnsity Estimate sns.distplot(ty10) ax = sns.distplot(ty10) ax.axvline(ty10['DGS10'].median(), color='blue', ls='-.') plt.close() #visualizing international income distributions #list the poorest and richest countries worldwide # Import the data