def plot_candlestick() -> None: """Plot a candlestick chart using data from Quandl.""" import matplotlib.dates as mdates from mpl_finance import candlestick_ohlc quandl.read_key() # Get data of ABN Amro df = quandl.get('EURONEXT/ABN', start_date='2018-07-01', end_date='2018-07-31') # Convert index to mpl format date and extract open, high, low, and close df['Date'] = df.index.map(mdates.date2num) df_ohlc = df[['Date', 'Open', 'High', 'Low', 'Last']] print(STR_FMT.format('df_ohlc.head()', df_ohlc.head())) print(STR_FMT.format('df_ohlc.tail()', df_ohlc.tail())) # Plot fig, ax = plt.subplots(figsize=(8, 4)) formatter = mdates.DateFormatter('%Y-%m-%d') ax.xaxis.set_major_formatter(formatter) candlestick_ohlc(ax, df_ohlc.values, width=0.8, colorup='green', colordown='red') # Save figure plt.subplots_adjust(hspace=0.75) fig.savefig(IMGDIR + 'candlestick.png', bbox_inches='tight')
def ema() -> None: """Exponential moving average of a time series.""" quandl.read_key() # Get data of ABN Amro df = quandl.get('EURONEXT/ABN', column_index=4) print(STR_FMT.format('df.head()', df.head())) print(STR_FMT.format('df.tail()', df.tail())) # Fill in missing values on a daily basis df_filled = df.asfreq('D', method='ffill') df_last = df_filled['Last'] # Calculate the EMA with a decay spanning 5-days and 30-days # The ewm() method provides exponential weighted functions help(df_last.ewm) series_short = df_last.ewm(span=5).mean() series_long = df_last.ewm(span=30).mean() # Plot the long and short window EMAs df_sma = pd.DataFrame(columns=['short', 'long']) df_sma['short'] = series_short df_sma['long'] = series_long df_sma.plot(figsize=(12, 8)) plt.savefig(IMGDIR + 'ema.png', bbox_inches='tight')
def plot_time_series() -> None: """Plot a time series using data from Quandl.""" quandl.read_key() # Get data of ABN Amro df = quandl.get('EURONEXT/ABN') print(STR_FMT.format('df.head()', df.head())) print(STR_FMT.format('df.tail()', df.tail())) df.plot() plt.savefig(IMGDIR + 'dataset.png', bbox_inches='tight') # Extract the daily closing price and volume prices = df['Last'] volumes = df['Volume'] print(STR_FMT.format('prices.head()', prices.head())) print(STR_FMT.format('volumes.tail()', volumes.tail())) print(STR_FMT.format('type(volumes)', type(volumes))) # Plot the prices and volumes # Top plot consisting of daily closing price top = plt.subplot2grid((4, 4), (0, 0), rowspan=3, colspan=4) top.plot(prices.index, prices, label='Last') plt.title('ABN Last Price from {low} - {high}'.format( low=prices.index[0].year, high=prices.index[-1].year)) plt.legend(loc=2) # The bottom plot consisting of daily trading volume bottom = plt.subplot2grid((4, 4), (3, 0), rowspan=1, colspan=4) bottom.bar(volumes.index, volumes) plt.title('ABN Daily Trading Volume') # Save figure plt.gcf().set_size_inches(12, 8) plt.subplots_adjust(hspace=0.75) plt.savefig(IMGDIR + 'time_series.png', bbox_inches='tight')
def plot_correlation() -> None: """Visualise the correlation between two datasets.""" quandl.read_key() # Get data for ABN Amro, Banco Santander, and Kas Bank df = quandl.get(['EURONEXT/ABN', 'EURONEXT/SANTA'], column_index=4, start_date='2016-01-01', end_date='2017-12-31') print(STR_FMT.format('df.head()', df.head())) print(STR_FMT.format('df.tail()', df.tail())) print(STR_FMT.format('df.describe()', df.describe())) # Compute the daily changes and window size for rolling df_filled = df.asfreq('D', method='ffill') daily_changes = df_filled.pct_change() abn_returns = daily_changes['EURONEXT/ABN - Last'] santa_returns = daily_changes['EURONEXT/SANTA - Last'] window = len(df_filled.index) // 2 print(STR_FMT.format('window', window)) print(STR_FMT.format('abn_returns.describe()', abn_returns.describe())) print(STR_FMT.format('santa_returns.describe()', santa_returns.describe())) # Compute the correlation with a rolling window df_corrs = abn_returns\ .rolling(window=window, min_periods=window)\ .corr(other=santa_returns)\ .dropna() df_corrs.plot(figsize=(12, 8)) plt.savefig(IMGDIR + 'correlation.png', bbox_inches='tight')
def qq_plot() -> None: """ Create a Q-Q (quantile-quantile) plot. Notes ---------- This is a graphical method for comparing two probability distributions by plotting their quantiles against each other, for example to test if the daily changes of a stock are distributed normally. """ from scipy import stats quandl.read_key() # Get the daily changes data df = quandl.get('EURONEXT/ABN', column_index=4) daily_changes = df.pct_change(periods=1).dropna() print(STR_FMT.format('daily_changes.describe()', daily_changes.describe())) # Create the Q-Q plot against a normal distribution # Note that stats.probplot is the same as a Q-Q plot, however probabilities # are shown in the scale of the theoretical distribution (x-axis) and the # y-axis contains unscaled quantiles of the sample data. fig, ax = plt.subplots(figsize=(8, 4)) help(stats.probplot) stats.probplot(daily_changes['Last'], dist='norm', plot=ax) fig.savefig(IMGDIR + 'qq_plot.png', bbox_inches='tight')
def time_series_analytics() -> None: """Visualise some statistical proprties of time series.""" quandl.read_key() # Get data of ABN Amro df = quandl.get('EURONEXT/ABN', column_index=4) print(STR_FMT.format('df.head()', df.head())) print(STR_FMT.format('df.tail()', df.tail())) # Calculate and plot the percentage daily returns daily_changes = df.pct_change(periods=1) print(STR_FMT.format('daily_changes.describe()', daily_changes.describe())) daily_changes.plot() plt.savefig(IMGDIR + 'pct_change.png', bbox_inches='tight') # Calculate and plot the cumulative returns # Equivalent to "df / df['Last'][0] - 1" df_cumprod = (daily_changes + 1).cumprod() - 1 df_cumprod.plot() plt.savefig(IMGDIR + 'cum_return.png', bbox_inches='tight') # Calculate and plot a histogram daily_changes.hist(bins=50, figsize=(8, 4)) plt.savefig(IMGDIR + 'hist.png', bbox_inches='tight') # Calculate and plot standard deviation / volaility over one month df_filled = df.asfreq('D', method='ffill') # Pad missing entries df_returns = df_filled.pct_change() df_std = df_returns.rolling(window=30, min_periods=30).std() df_std.plot() plt.savefig(IMGDIR + 'volatility.png', bbox_inches='tight')
def __init__(self, path, table, date): if not date: import datetime now = datetime.datetime.now() date = { 'start': '2000-01-01', 'end': now.year + '-' + now.month + '-' + now.day } quandl.read_key(filename=path) self.data = quandl.get(table, start_date=date['start'], end_date=date['end'])
def plot(): from pandas_datareader import data import datetime import dateutil.relativedelta import quandl from bokeh.plotting import figure, show, output_file from bokeh.embed import components from bokeh.resources import CDN start = datetime.datetime.now() - dateutil.relativedelta.relativedelta(days=30) end = datetime.datetime.now() quandl.read_key() infy_stock =data.DataReader('BSE/BOM500209', 'quandl',api_key=quandl.ApiConfig.api_key, start = start, end = end) infy_stock["Status"] = ["Increase" if c > o else "Decrease" if c < o else "Equal" for c,o in zip(infy_stock.Close,infy_stock.Open)] infy_stock["Midpoint"] = (infy_stock.Open + infy_stock.Close)/2 infy_stock["Height"] = abs(infy_stock.Open - infy_stock.Close) plot_i = figure(x_axis_type = 'datetime', width = 800, height = 300, sizing_mode = "scale_width") plot_i.title.text = "Candlestick Chart of Infosys" plot_i.grid.grid_line_alpha = 0.5 hours_12 = 12*60*60*1000 plot_i.segment(infy_stock.index, infy_stock.High, infy_stock.index, infy_stock.Low, color="magenta") plot_i.rect(infy_stock.index[infy_stock.Status == "Increase"], infy_stock.Midpoint[infy_stock.Status == "Increase"], hours_12, infy_stock.Height[infy_stock.Status == "Increase"], fill_color="green", line_color="black") plot_i.rect(infy_stock.index[infy_stock.Status == "Decrease"], infy_stock.Midpoint[infy_stock.Status == "Decrease"], hours_12, infy_stock.Height[infy_stock.Status == "Decrease"], fill_color="red", line_color="black") plot_i.rect(infy_stock.index[infy_stock.Status == "Equal"], infy_stock.Midpoint[infy_stock.Status == "Equal"], hours_12, infy_stock.Height[infy_stock.Status == "Equal"], fill_color="blue", line_color="black") script1, div1 = components(plot_i) cdn_js = CDN.js_files cdn_css = CDN.css_files return render_template("plot.html", script1 = script1, div1 = div1, cdn_js = cdn_js, cdn_css = cdn_css)
def correlation() -> None: """Downloading multiple time series data and display their correlation.""" quandl.read_key() # Get data for ABN Amro, Banco Santander, and Kas Bank df = quandl.get(['EURONEXT/ABN', 'EURONEXT/SANTA', 'EURONEXT/KA'], column_index=4, collapse='monthly', start_date='2016-01-01', end_date='2017-12-31') print(STR_FMT.format('df.head()', df.head())) print(STR_FMT.format('df.tail()', df.tail())) print(STR_FMT.format('df.describe()', df.describe())) # Plot df.plot() plt.savefig(IMGDIR + 'multiple_data.png', bbox_inches='tight') # Compute the correlation for the daily changes corr = df.pct_change().corr(method='pearson') print(STR_FMT.format('corr', corr))
def sma() -> None: """Simple moving average of a time series.""" quandl.read_key() # Get data of ABN Amro df = quandl.get('EURONEXT/ABN', column_index=4) print(STR_FMT.format('df.head()', df.head())) print(STR_FMT.format('df.tail()', df.tail())) # Fill in missing values on a daily basis df_filled = df.asfreq('D', method='ffill') df_last = df_filled['Last'] # Calculate the SMA for a 5-day and 30-day window series_short = df_last.rolling(window=5, min_periods=5).mean() series_long = df_last.rolling(window=30, min_periods=30).mean() # Plot the long and short window SMAs df_sma = pd.DataFrame(columns=['short', 'long']) df_sma['short'] = series_short df_sma['long'] = series_long df_sma.plot(figsize=(12, 8)) plt.savefig(IMGDIR + 'sma.png', bbox_inches='tight')
def get_data(tickers, start_date='2010-07-01', end_date='2019-03-01', lags=5): quandl.read_key('../data/quandl.key') # personal key to quandl API df = quandl.get_table('WIKI/PRICES', ticker='AAPL', qopts={'columns': ['date', 'adj_close']}, date={ 'gte': start_date, 'lte': end_date }, paginate=True) df.date = pd.to_datetime(df.date) df['today'] = df['adj_close'] df['lag_00'] = df['today'].pct_change() * 100 df.set_index(pd.DatetimeIndex(df['date']), inplace=True) df.drop('date', axis=1, inplace=True) for i in range(lags): df['lag_{:02}'.format(i + 1)] = df['today'].shift(i + 1).pct_change() * 100 df['direction'] = np.where(df['lag_00'] >= 0, 1, 0) df = df.dropna() return df
get_stock_data(None) except AssertionError: test1 = True try: get_stock_data(55) except AssertionError: test2 = True self.assertTrue(test1) self.assertTrue(test2) def test_getting_data(self): """Test that data is returned""" gme = get_stock_data("GME") self.assertFalse(gme.empty) multiple = get_stock_data(["NOK", "GME", "AMC"]) self.assertFalse(multiple.empty) erroneous = get_stock_data(";laksdj;flkasjd;a") self.assertTrue(erroneous.empty) if __name__ == '__main__': # Set quandl api key quandl.read_key(filename=API_FILE) unittest.main()
import quandl import os import pandas as pd import sys #use this line the first time you use quandl. For more info see: https://github.com/quandl/quandl-python # quandl.save_key("supersecret") #this line afterwards quandl.read_key() cot_period = 26 dir = 'data\\' continue_contract_month_base = 2 quandl_cot_futures_map = { "ZN": "043602", "ZS": "005602", "ZM": "026603", "ZL": "007601", "ZC": "002602", "ZW": "001602", "HE": "054642", "LE": "057642", "GC": "088691", "SI": "084691", "HG": "085692", "CL": "067651", "HO": "022651", "RB": "111659", "NG": "023651",
Yahoo has significantly altered its data api, and pandas_datareader has deprecated many of its readers, so I have elected to rewrite the data acquisition portion of this project using the Quandl api instead. ''' import datetime import matplotlib.pyplot as plt import pandas as pd import numpy as np import quandl from sklearn.preprocessing import Normalizer from sklearn.pipeline import make_pipeline from sklearn.cluster import KMeans from sklearn.decomposition import PCA quandl.read_key('../data/quandl.key') # personal key to quandl API company_dict = { 'Amazon': 'AMZN', 'Apple': 'AAPL', 'Walgreen': 'WBA', 'Northrop Grumman': 'NOC', 'Boeing': 'BA', 'Lockheed Martin': 'LMT', 'McDonalds': 'MCD', 'Intel': 'INTC', 'Navistar': 'NAV', 'IBM': 'IBM', 'Texas Instruments': 'TXN', 'MasterCard': 'MA', 'Microsoft': 'MSFT',
def __init__(self, filename): quandl.read_key(filename=filename) self.data_stream = "EOD/"