Exemple #1
0
def plot_candlestick() -> None:
    """Plot a candlestick chart using data from Quandl."""
    import matplotlib.dates as mdates
    from mpl_finance import candlestick_ohlc

    quandl.read_key()

    # Get data of ABN Amro
    df = quandl.get('EURONEXT/ABN',
                    start_date='2018-07-01',
                    end_date='2018-07-31')

    # Convert index to mpl format date and extract open, high, low, and close
    df['Date'] = df.index.map(mdates.date2num)
    df_ohlc = df[['Date', 'Open', 'High', 'Low', 'Last']]
    print(STR_FMT.format('df_ohlc.head()', df_ohlc.head()))
    print(STR_FMT.format('df_ohlc.tail()', df_ohlc.tail()))

    # Plot
    fig, ax = plt.subplots(figsize=(8, 4))
    formatter = mdates.DateFormatter('%Y-%m-%d')
    ax.xaxis.set_major_formatter(formatter)
    candlestick_ohlc(ax,
                     df_ohlc.values,
                     width=0.8,
                     colorup='green',
                     colordown='red')

    # Save figure
    plt.subplots_adjust(hspace=0.75)
    fig.savefig(IMGDIR + 'candlestick.png', bbox_inches='tight')
Exemple #2
0
def ema() -> None:
    """Exponential moving average of a time series."""
    quandl.read_key()

    # Get data of ABN Amro
    df = quandl.get('EURONEXT/ABN', column_index=4)
    print(STR_FMT.format('df.head()', df.head()))
    print(STR_FMT.format('df.tail()', df.tail()))

    # Fill in missing values on a daily basis
    df_filled = df.asfreq('D', method='ffill')
    df_last = df_filled['Last']

    # Calculate the EMA with a decay spanning 5-days and 30-days
    # The ewm() method provides exponential weighted functions
    help(df_last.ewm)
    series_short = df_last.ewm(span=5).mean()
    series_long = df_last.ewm(span=30).mean()

    # Plot the long and short window EMAs
    df_sma = pd.DataFrame(columns=['short', 'long'])
    df_sma['short'] = series_short
    df_sma['long'] = series_long
    df_sma.plot(figsize=(12, 8))
    plt.savefig(IMGDIR + 'ema.png', bbox_inches='tight')
Exemple #3
0
def plot_time_series() -> None:
    """Plot a time series using data from Quandl."""
    quandl.read_key()

    # Get data of ABN Amro
    df = quandl.get('EURONEXT/ABN')
    print(STR_FMT.format('df.head()', df.head()))
    print(STR_FMT.format('df.tail()', df.tail()))
    df.plot()
    plt.savefig(IMGDIR + 'dataset.png', bbox_inches='tight')

    # Extract the daily closing price and volume
    prices = df['Last']
    volumes = df['Volume']
    print(STR_FMT.format('prices.head()', prices.head()))
    print(STR_FMT.format('volumes.tail()', volumes.tail()))
    print(STR_FMT.format('type(volumes)', type(volumes)))

    # Plot the prices and volumes
    # Top plot consisting of daily closing price
    top = plt.subplot2grid((4, 4), (0, 0), rowspan=3, colspan=4)
    top.plot(prices.index, prices, label='Last')
    plt.title('ABN Last Price from {low} - {high}'.format(
        low=prices.index[0].year, high=prices.index[-1].year))
    plt.legend(loc=2)

    # The bottom plot consisting of daily trading volume
    bottom = plt.subplot2grid((4, 4), (3, 0), rowspan=1, colspan=4)
    bottom.bar(volumes.index, volumes)
    plt.title('ABN Daily Trading Volume')

    # Save figure
    plt.gcf().set_size_inches(12, 8)
    plt.subplots_adjust(hspace=0.75)
    plt.savefig(IMGDIR + 'time_series.png', bbox_inches='tight')
Exemple #4
0
def plot_correlation() -> None:
    """Visualise the correlation between two datasets."""
    quandl.read_key()

    # Get data for ABN Amro, Banco Santander, and Kas Bank
    df = quandl.get(['EURONEXT/ABN', 'EURONEXT/SANTA'],
                    column_index=4,
                    start_date='2016-01-01',
                    end_date='2017-12-31')
    print(STR_FMT.format('df.head()', df.head()))
    print(STR_FMT.format('df.tail()', df.tail()))
    print(STR_FMT.format('df.describe()', df.describe()))

    # Compute the daily changes and window size for rolling
    df_filled = df.asfreq('D', method='ffill')
    daily_changes = df_filled.pct_change()
    abn_returns = daily_changes['EURONEXT/ABN - Last']
    santa_returns = daily_changes['EURONEXT/SANTA - Last']
    window = len(df_filled.index) // 2
    print(STR_FMT.format('window', window))
    print(STR_FMT.format('abn_returns.describe()', abn_returns.describe()))
    print(STR_FMT.format('santa_returns.describe()', santa_returns.describe()))

    # Compute the correlation with a rolling window
    df_corrs = abn_returns\
        .rolling(window=window, min_periods=window)\
        .corr(other=santa_returns)\
        .dropna()
    df_corrs.plot(figsize=(12, 8))
    plt.savefig(IMGDIR + 'correlation.png', bbox_inches='tight')
Exemple #5
0
def qq_plot() -> None:
    """
    Create a Q-Q (quantile-quantile) plot.

    Notes
    ----------
    This is a graphical method for comparing two probability distributions by
    plotting their quantiles against each other, for example to test if the
    daily changes of a stock are distributed normally.

    """
    from scipy import stats

    quandl.read_key()

    # Get the daily changes data
    df = quandl.get('EURONEXT/ABN', column_index=4)
    daily_changes = df.pct_change(periods=1).dropna()
    print(STR_FMT.format('daily_changes.describe()', daily_changes.describe()))

    # Create the Q-Q plot against a normal distribution
    # Note that stats.probplot is the same as a Q-Q plot, however probabilities
    # are shown in the scale of the theoretical distribution (x-axis) and the
    # y-axis contains unscaled quantiles of the sample data.
    fig, ax = plt.subplots(figsize=(8, 4))
    help(stats.probplot)
    stats.probplot(daily_changes['Last'], dist='norm', plot=ax)
    fig.savefig(IMGDIR + 'qq_plot.png', bbox_inches='tight')
Exemple #6
0
def time_series_analytics() -> None:
    """Visualise some statistical proprties of time series."""
    quandl.read_key()

    # Get data of ABN Amro
    df = quandl.get('EURONEXT/ABN', column_index=4)
    print(STR_FMT.format('df.head()', df.head()))
    print(STR_FMT.format('df.tail()', df.tail()))

    # Calculate and plot the percentage daily returns
    daily_changes = df.pct_change(periods=1)
    print(STR_FMT.format('daily_changes.describe()', daily_changes.describe()))
    daily_changes.plot()
    plt.savefig(IMGDIR + 'pct_change.png', bbox_inches='tight')

    # Calculate and plot the cumulative returns
    # Equivalent to "df / df['Last'][0] - 1"
    df_cumprod = (daily_changes + 1).cumprod() - 1
    df_cumprod.plot()
    plt.savefig(IMGDIR + 'cum_return.png', bbox_inches='tight')

    # Calculate and plot a histogram
    daily_changes.hist(bins=50, figsize=(8, 4))
    plt.savefig(IMGDIR + 'hist.png', bbox_inches='tight')

    # Calculate and plot standard deviation / volaility over one month
    df_filled = df.asfreq('D', method='ffill')  # Pad missing entries
    df_returns = df_filled.pct_change()
    df_std = df_returns.rolling(window=30, min_periods=30).std()
    df_std.plot()
    plt.savefig(IMGDIR + 'volatility.png', bbox_inches='tight')
Exemple #7
0
 def __init__(self, path, table, date):
     if not date:
         import datetime
         now = datetime.datetime.now()
         date = {
             'start': '2000-01-01',
             'end': now.year + '-' + now.month + '-' + now.day
         }
     quandl.read_key(filename=path)
     self.data = quandl.get(table,
                            start_date=date['start'],
                            end_date=date['end'])
Exemple #8
0
def plot():
    from pandas_datareader import data
    import datetime
    import dateutil.relativedelta
    import quandl
    from bokeh.plotting import figure, show, output_file
    from bokeh.embed import components
    from bokeh.resources import CDN    

    start = datetime.datetime.now() - dateutil.relativedelta.relativedelta(days=30)
    end   = datetime.datetime.now()
    quandl.read_key()
    infy_stock =data.DataReader('BSE/BOM500209', 'quandl',api_key=quandl.ApiConfig.api_key, 
                                 start = start, end = end)

    infy_stock["Status"]   = ["Increase" if c > o else "Decrease" if c < o else "Equal" 
                            for c,o in zip(infy_stock.Close,infy_stock.Open)]
    infy_stock["Midpoint"] = (infy_stock.Open + infy_stock.Close)/2
    infy_stock["Height"]   = abs(infy_stock.Open - infy_stock.Close)

    plot_i = figure(x_axis_type = 'datetime', width = 800, height = 300, sizing_mode = "scale_width")
    plot_i.title.text = "Candlestick Chart of Infosys"
    plot_i.grid.grid_line_alpha = 0.5

    hours_12 = 12*60*60*1000

    plot_i.segment(infy_stock.index, infy_stock.High, infy_stock.index, infy_stock.Low, color="magenta")

    plot_i.rect(infy_stock.index[infy_stock.Status == "Increase"], infy_stock.Midpoint[infy_stock.Status == "Increase"], 
                hours_12, infy_stock.Height[infy_stock.Status == "Increase"], fill_color="green", line_color="black")

    plot_i.rect(infy_stock.index[infy_stock.Status == "Decrease"], infy_stock.Midpoint[infy_stock.Status == "Decrease"], 
                hours_12, infy_stock.Height[infy_stock.Status == "Decrease"], fill_color="red", line_color="black")

    plot_i.rect(infy_stock.index[infy_stock.Status == "Equal"], infy_stock.Midpoint[infy_stock.Status == "Equal"], 
                hours_12, infy_stock.Height[infy_stock.Status == "Equal"], fill_color="blue", line_color="black")

    script1, div1 = components(plot_i)
    cdn_js = CDN.js_files
    cdn_css = CDN.css_files

    return render_template("plot.html", script1 = script1, div1 = div1, cdn_js = cdn_js, cdn_css = cdn_css)
Exemple #9
0
def correlation() -> None:
    """Downloading multiple time series data and display their correlation."""
    quandl.read_key()

    # Get data for ABN Amro, Banco Santander, and Kas Bank
    df = quandl.get(['EURONEXT/ABN', 'EURONEXT/SANTA', 'EURONEXT/KA'],
                    column_index=4,
                    collapse='monthly',
                    start_date='2016-01-01',
                    end_date='2017-12-31')
    print(STR_FMT.format('df.head()', df.head()))
    print(STR_FMT.format('df.tail()', df.tail()))
    print(STR_FMT.format('df.describe()', df.describe()))

    # Plot
    df.plot()
    plt.savefig(IMGDIR + 'multiple_data.png', bbox_inches='tight')

    # Compute the correlation for the daily changes
    corr = df.pct_change().corr(method='pearson')
    print(STR_FMT.format('corr', corr))
Exemple #10
0
def sma() -> None:
    """Simple moving average of a time series."""
    quandl.read_key()

    # Get data of ABN Amro
    df = quandl.get('EURONEXT/ABN', column_index=4)
    print(STR_FMT.format('df.head()', df.head()))
    print(STR_FMT.format('df.tail()', df.tail()))

    # Fill in missing values on a daily basis
    df_filled = df.asfreq('D', method='ffill')
    df_last = df_filled['Last']

    # Calculate the SMA for a 5-day and 30-day window
    series_short = df_last.rolling(window=5, min_periods=5).mean()
    series_long = df_last.rolling(window=30, min_periods=30).mean()

    # Plot the long and short window SMAs
    df_sma = pd.DataFrame(columns=['short', 'long'])
    df_sma['short'] = series_short
    df_sma['long'] = series_long
    df_sma.plot(figsize=(12, 8))
    plt.savefig(IMGDIR + 'sma.png', bbox_inches='tight')
Exemple #11
0
def get_data(tickers, start_date='2010-07-01', end_date='2019-03-01', lags=5):
    quandl.read_key('../data/quandl.key')  # personal key to quandl API

    df = quandl.get_table('WIKI/PRICES',
                          ticker='AAPL',
                          qopts={'columns': ['date', 'adj_close']},
                          date={
                              'gte': start_date,
                              'lte': end_date
                          },
                          paginate=True)
    df.date = pd.to_datetime(df.date)
    df['today'] = df['adj_close']
    df['lag_00'] = df['today'].pct_change() * 100
    df.set_index(pd.DatetimeIndex(df['date']), inplace=True)
    df.drop('date', axis=1, inplace=True)

    for i in range(lags):
        df['lag_{:02}'.format(i +
                              1)] = df['today'].shift(i + 1).pct_change() * 100
    df['direction'] = np.where(df['lag_00'] >= 0, 1, 0)

    df = df.dropna()
    return df
Exemple #12
0
            get_stock_data(None)
        except AssertionError:
            test1 = True

        try:
            get_stock_data(55)
        except AssertionError:
            test2 = True

        self.assertTrue(test1)
        self.assertTrue(test2)

    def test_getting_data(self):
        """Test that data is returned"""

        gme = get_stock_data("GME")
        self.assertFalse(gme.empty)

        multiple = get_stock_data(["NOK", "GME", "AMC"])
        self.assertFalse(multiple.empty)

        erroneous = get_stock_data(";laksdj;flkasjd;a")
        self.assertTrue(erroneous.empty)


if __name__ == '__main__':
    # Set quandl api key
    quandl.read_key(filename=API_FILE)

    unittest.main()
import quandl
import os
import pandas as pd
import sys

#use this line the first time you use quandl. For more info see: https://github.com/quandl/quandl-python
# quandl.save_key("supersecret")

#this line afterwards
quandl.read_key()

cot_period = 26
dir = 'data\\'
continue_contract_month_base = 2

quandl_cot_futures_map = {
    "ZN": "043602",
    "ZS": "005602",
    "ZM": "026603",
    "ZL": "007601",
    "ZC": "002602",
    "ZW": "001602",
    "HE": "054642",
    "LE": "057642",
    "GC": "088691",
    "SI": "084691",
    "HG": "085692",
    "CL": "067651",
    "HO": "022651",
    "RB": "111659",
    "NG": "023651",
Exemple #14
0
Yahoo has significantly altered its data api, and pandas_datareader has deprecated many
of its readers, so I have elected to rewrite the data acquisition portion of this project
using the Quandl api instead.
'''
import datetime

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import quandl
from sklearn.preprocessing import Normalizer
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

quandl.read_key('../data/quandl.key') # personal key to quandl API

company_dict = {
    'Amazon': 'AMZN',
    'Apple': 'AAPL',
    'Walgreen': 'WBA',
    'Northrop Grumman': 'NOC',
    'Boeing': 'BA',
    'Lockheed Martin': 'LMT',
    'McDonalds': 'MCD',
    'Intel': 'INTC',
    'Navistar': 'NAV',  
    'IBM': 'IBM',
    'Texas Instruments': 'TXN',
    'MasterCard': 'MA',
    'Microsoft': 'MSFT',
Exemple #15
0
 def __init__(self, filename):
     quandl.read_key(filename=filename)
     self.data_stream = "EOD/"