Example #1
0
def main():

    #load data    
    returns = data.get_data_google('SPY', start='2008-5-1', end='2009-12-1')['Close'].pct_change()
    returns.plot()
    plt.ylabel('daily returns in %');
    
    with pm.Model() as sp500_model:
        
        nu = pm.Exponential('nu', 1./10, testval=5.0)
        sigma = pm.Exponential('sigma', 1./0.02, testval=0.1)
        
        s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(returns))                
        r = pm.StudentT('r', nu, lam=pm.math.exp(-2*s), observed=returns)
        
    
    with sp500_model:
        trace = pm.sample(2000)

    pm.traceplot(trace, [nu, sigma]);
    plt.show()
    
    plt.figure()
    returns.plot()
    plt.plot(returns.index, np.exp(trace['s',::5].T), 'r', alpha=.03)
    plt.legend(['S&P500', 'stochastic volatility process'])
    plt.show()
Example #2
0
 def test_dtypes(self):
     #GH3995, #GH8980
     data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
     assert np.issubdtype(data.Open.dtype, np.number)
     assert np.issubdtype(data.Close.dtype, np.number)
     assert np.issubdtype(data.Low.dtype, np.number)
     assert np.issubdtype(data.High.dtype, np.number)
     assert np.issubdtype(data.Volume.dtype, np.number)
Example #3
0
 def test_get_multi1(self):
     for locale in self.locales:
         sl = ['AAPL', 'AMZN', 'GOOG']
         with tm.set_locale(locale):
             pan = web.get_data_google(sl, '2012')
         ts = pan.Close.GOOG.index[pan.Close.AAPL < pan.Close.GOOG]
         if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and
             hasattr(pan.Close, 'AAPL')):
             self.assertEqual(ts[0].dayofyear, 3)
         else:
             self.assertRaises(AttributeError, lambda: pan.Close)
    def test_get_multi2(self):
        with warnings.catch_warnings(record=True) as w:
            for locale in self.locales:
                with tm.set_locale(locale):
                    pan = web.get_data_google(['GE', 'MSFT', 'INTC'],
                                              'JAN-01-12', 'JAN-31-12')
                result = pan.Close.ix['01-18-12']
                assert_n_failed_equals_n_null_columns(w, result)

                # sanity checking

                assert np.issubdtype(result.dtype, np.floating)
                result = pan.Open.ix['Jan-15-12':'Jan-20-12']
                self.assertEqual((4, 3), result.shape)
                assert_n_failed_equals_n_null_columns(w, result)
Example #5
0
    def test_get_multi2(self):
        with warnings.catch_warnings(record=True) as w:
            for locale in self.locales:
                with tm.set_locale(locale):
                    pan = web.get_data_google(['GE', 'MSFT', 'INTC'],
                                              'JAN-01-12', 'JAN-31-12')
                result = pan.Close.ix['01-18-12']
                assert_n_failed_equals_n_null_columns(w, result)

                # sanity checking

                assert np.issubdtype(result.dtype, np.floating)
                result = pan.Open.ix['Jan-15-12':'Jan-20-12']
                self.assertEqual((4, 3), result.shape)
                assert_n_failed_equals_n_null_columns(w, result)
Example #6
0
    def updateAxes(self):

        # clear figure
        self.figure.clf()
        self.figure.canvas.draw()

        # get contents of ticker_widget in a list, return if empty
        z = self.ticker_widget.items_model
        contents = [z.item(x).text() for x in range(z.rowCount())
                    if z.item(x).checkState() == 2]
        if contents == []:
            return

        # get data with pandas_datareader
        try:
            data = get_data_google(contents, self.start_edit.text(), self.end_edit.text())
        except:
            return

        # plot every selected symbol
        for symbol in contents:
            data_tmp = data.minor_xs(symbol)['Close']
            ax = self.figure.add_subplot(111)
            x = data_tmp.index

            # scale by the max price if scale_box is checked, set label accordingly
            if self.scale_box.checkState() == 0:
                label = symbol
                yLabel = '$'
                y = data_tmp
            else:
                label = symbol + ', %'
                yLabel = ''
                y = 100 * data_tmp / data_tmp[0]
            ax.plot(x, y, self.linesty, ms=self.ms, lw=self.lw, label=label)

        # clean up plot
        ax.set_title('Daily Closing Price')
        ax.legend(loc='upper left', bbox_to_anchor=(1, 1), numpoints=3)
        ax.grid(True)
        ax.set_ylabel(yLabel)
        self.figure.tight_layout()
        self.figure.subplots_adjust(right=0.75)
        self.figure.canvas.draw()
Example #7
0
def historical_data(ticker, components): 
	# Prices adjusted for splits
	today, dates = current_date(), []
	for each in components: 
		if PATTERNS['valid_date'].match(each):
			dates.append(each)
	if not dates: 
		return {"message": Response.missing_dates(ticker)}
	# Validate dates
	for each in dates: 
		if each > today: 
			return {"message": Response.invalid_date(each)}
		try: 
			date = datetime.datetime.strptime(each, '%Y-%m-%d')
		except ValueError: 
			return {"message": Response.invalid_date(each)}
	# Validate ticker and fetch data
	try: 
		quotes = data.get_data_google(ticker)
	except Exception: 
		return {"message": Response.data_notfound(ticker)}

	# Return price data for one day
	if len(dates)==1: 
		date = dates[0]
		try: 
			quote = quotes.loc[date]
		except KeyError: 
			return {"message": Response.no_data_for_date(date)}
		return {"message": Response.historical_price(
			ticker, date, quote['Open'], quote['High'], quote['Low'], quote['Close'], int(quote['Volume']))}

	# If 2 dates are entered, returned the range during the given period
	elif len(dates)==2: 
		dates = sorted(dates)
		start, end = dates[0], dates[1]
		quotes = quotes.loc[start:end]
		high = round(quotes['High'].max(),2)
		low = round(quotes['Low'].min(),2)
		return {"message": Response.historical_range(ticker, start, end, high, low)}

	else: 
		return {"message": Response.too_many_dates(ticker)}
Example #8
0
def create_series(ticker):
    time_ago = datetime.datetime.today().date() - relativedelta(months=12)
    ticker_data = web.get_data_google(ticker,
                                      time_ago)['Close'].pct_change().dropna()
    ticker_data_len = len(ticker_data)
    x = [0] * 17
    y = [0] * 17
    x_test = [0] * 17
    y_test = [0] * 17
    a = 0
    b = 6
    for i in range(0, 17):
        x[i] = ticker_data[a:b]
        y[i] = ticker_data[b]
        a = b
        b = a + 6
    for i in range(0, 17):
        x_test[i] = ticker_data[a:b]
        y_test[i] = ticker_data[b]
        a = b
        b = a + 6
    return y, x, y_test, x_test
Example #9
0
def get_symbol_returns_from_yahoo(symbol, start=None, end=None):
    #从雅虎或者谷歌获取数据会报错,雅虎谷歌更改了接口
    """
    Wrapper for pandas.io.data.get_data_yahoo().
    Retrieves prices for symbol from yahoo and computes returns
    based on adjusted closing prices.

    Parameters
    ----------
    symbol : str
        Symbol name to load, e.g. 'SPY'
    start : pandas.Timestamp compatible, optional
        Start date of time period to retrieve
    end : pandas.Timestamp compatible, optional
        End date of time period to retrieve

    Returns
    -------
    pandas.DataFrame
        Returns of symbol in requested period.
    """

    try:
        px = web.get_data_yahoo(symbol, start=start, end=end)
        px['date'] = pd.to_datetime(px['date'])
        px.set_index('date', drop=False, inplace=True)
        rets = px[['adjclose']].pct_change().dropna()
    except Exception as e:
        warnings.warn(
            'Yahoo Finance read failed: {}, falling back to Google'.format(e),
            UserWarning)
        px = web.get_data_google(symbol, start=start, end=end)
        rets = px[['Close']].pct_change().dropna()

    rets.index = rets.index.tz_localize("UTC")
    rets.columns = [symbol]
    return rets
Example #10
0
 def test_get_goog_volume(self):
     for locale in self.locales:
         with tm.set_locale(locale):
             df = web.get_data_google('GOOG').sort_index()
         self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473)
Example #11
0
 def test_unicode_date(self):
     #GH8967
     data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
     self.assertEquals(data.index.name, 'Date')
    def test_bad_retry_count(self):

        with pytest.raises(ValueError):
            web.get_data_google('F', retry_count=-1)
Example #13
0
 def test_get_goog_volume(self):
     for locale in self.locales:
         with tm.set_locale(locale):
             df = web.get_data_google('GOOG').sort_index()
         self.assertEqual(df.Volume.ix['JAN-02-2015'], 1446662)
    def test_bad_retry_count(self):

        with tm.assertRaises(ValueError):
            web.get_data_google('F', retry_count = -1)
 def test_get_multi_all_invalid(self):
     with warnings.catch_warnings(record=True):
         sl = ['INVALID', 'INVALID2', 'INVALID3']
         with pytest.raises(RemoteDataError):
             web.get_data_google(sl, '2012')
# -*- coding: utf-8 -*-
"""
  Name     : c4_03_get_stock_data_google.py
  Book     : Python for Finance (2nd ed.)
  Publisher: Packt Publishing Ltd. 
  Author   : Yuxing Yan
  Date     : 6/6/2017
  email    : [email protected]
             [email protected]
"""
import pandas_datareader.data as getData
df = getData.get_data_google("IBM")
print(df.head())
Example #17
0
    def test_bad_retry_count(self):

        with tm.assertRaises(ValueError):
            web.get_data_google('F', retry_count=-1)
Example #18
0
from pandas_datareader import data
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np

returns = data.get_data_google('SPY', start='2008-5-1',
                               end='2009-12-1')['Close'].pct_change()
print(returns)

with pm.Model() as sp500_model:
    nu = pm.Exponential('nu', 1. / 10, testval=5.)
    sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
    s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = pm.Deterministic('volatility_process',
                                          pm.math.exp(-2 * s))
    r = pm.StudentT('r', nu, lam=volatility_process, observed=returns)

with sp500_model:
    trace = pm.sample(2000)
pm.traceplot(trace, [nu, sigma])

fig, ax = plt.subplots(figsize=(15, 8))
returns.plot(ax=ax)
ax.plot(returns.index, 1 / np.exp(trace['s', ::5].T), 'r', alpha=.03)
ax.set(title='volatility_process', xlabel='time', ylabel='volatility')
ax.legend(['S&P500', 'stochastic volatility process'])
plt.show()

import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
import pandas_datareader.data as dataRetrieve
import datetime


beg_date = datetime.datetime(2012, 1, 5)
end_date = datetime.datetime(2012, 12, 1)

df = dataRetrieve.get_data_google('AAPL', beg_date, end_date) # always worth trying both yahoo and google in case one fails

print(df.head())
Example #20
0
import datetime as dt
import sys

import numpy as np
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from arch import arch_model

start = dt.datetime(2000, 1, 1)
end = dt.datetime(2017, 1, 1)
sp500 = web.get_data_google('SPY', start=start, end=end)
returns = 100 * sp500['Close'].pct_change().dropna()
returns.plot()
plt.show()

model = arch_model(returns, vol='Garch', p=1, o=0, q=1, dist='Normal')
results = model.fit()
print(results.summary())

forecasts = results.forecast(horizon=30, method='simulation')
sims = forecasts.simulations

lines = plt.plot(sims.values[-1, ::30].T, alpha=0.33)
lines[0].set_label('Simulated paths')
plt.plot()

print(np.percentile(sims.values[-1, 30].T, 5))
plt.hist(sims.values[-1, 30], bins=50)
plt.title('Distribution of Returns')
Example #21
0
                features_indiv[0, 6] += 1
            i += 1
        #计算余弦距离确定该文本所属分组
        dist1 = pdist(np.vstack([features_indiv, proArray]), 'cosine')
        dist2 = pdist(np.vstack([features_indiv, nproArray]), 'cosine')
        if dist1 > dist2:
            Polarity[t, 0] += Polarity_init * 1.3
        else:
            Polarity[t, 0] += Polarity_init
        k += 1
    Polarity[t, 0] = Polarity[t, 0] / vol
    t += 1

##获取公司股票信息并绘制相关折线图
#获取沃尔玛股票信息
WMT = dt.get_data_google('wmt', start='2015-12-31', end='2018-01-01')
print WMT.head()
WMT.to_csv('walmart.csv')
#绘制折线图
style.use('ggplot')
plt.rcParams['axes.unicode_minus'] = False  #用来正常显示负号
df = pd.read_csv('walmart.csv', index_col='Date', parse_dates=True)
df['H-L'] = df.High - df.Low
ma = pd.rolling_mean(df.Close, 10)
#股价相关信息折线图
ax1 = plt
ax1.plot(df.Close, label='Walmart')
ax1.plot(ma, label='10MA')
plt.legend()
ax1.xlabel('date')
ax1.ylabel('price')
Example #22
0
 def _get_nasdaq_csv(self, ticker):
     return data.get_data_google(ticker, start='1996-05-06')
Example #23
0
def downloadQuotes(tickers, date1=None, date2=None, adjust=True, Verbose=False):
    """
    Given a ticker sequence, return historical Yahoo! quotes as a pandas DataFrame.

    Parameters
    ----------
    tickers : sequence
        A sequence (such as a list) of string tickers. For example:
        ['aapl', 'msft']
    date1 : {datetime.date, tuple}, optional
        The first date to grab historical quotes on. For example:
        datetime.date(2010, 1, 1) or (2010, 1, 1). By default the first
        date is (1900, 1, 1).
    date2 : {datetime.date, tuple}, optional
        The last date to grab historical quotes on. For example:
        datetime.date(2010, 12, 31) or (2010, 12, 31). By default the last
        date is 10 days beyond today's date.
    adjust : bool, optional
        Adjust (default) the open, close, high, and low prices. The
        adjustment takes splits and dividends into account such that the
        corresponding returns are correct. Volume is already split adjusted
        by Yahoo so it is not changed by the value of `adjust`.
    Verbose : bool, optional
        Print the ticker currently being loaded. By default the tickers are
        not printed.

    Returns
    -------
    quotes_df : DataFrame
        A pandas dataframe is returned. In order, the  axes contain: dates,
        quotes (adjusted close). The elements along the item axis depend on the value
        of `adjust`. When `adjust` is False, the items are

        ['open', 'close', 'high', 'low', 'volume', 'adjclose']

        When adjust is true (default), the adjusted close ('adjclose') is
        not included. The dates are datetime.date objects.

    Examples
    --------
    items = ['Adj Close']
    date1 = '2012-01-01'
    date2 = datetime.date.today()
    ticker = 'GOOGL'
    data = get_data_yahoo(ticker, start = date1, end = date2)[items]
    dates = data.index
    data.columns = [ticker]

    ticker = 'AMZN'
    data2 = get_data_yahoo(ticker, start = date1, end = date2)[items]
    dates2 = data2.index
    data2.columns = [ticker]

    data = data.join(data2, how='outer')
    data.sort_index( axis=0, inplace=True )

    data.tail()

                 GOOGL    AMZN
    Date
    2014-04-07  540.63  317.76
    2014-04-08  557.51  327.07
    2014-04-09  567.04  331.81
    2014-04-10  546.69  317.11
    2014-04-11  537.76  311.73


    """

    from time import sleep
    from matplotlib.finance import *
    #from la.external.matplotlib import quotes_historical_yahoo
    import pandas as pd
    from pandas_datareader.data import DataReader, get_data_yahoo, get_data_google
    ##from pandas.io.data import DataReader
    ##from pandas.io.data import get_data_yahoo, get_data_google
    #import la

    if date1 is None:
        date1 = datetime.date(1900, 1, 1)
    if date2 is None:
        date2 = datetime.date.today() + datetime.timedelta(+10)
    #quotes_df = None
    #lar = None
    items = ['Adj Close','volume']
    items_to_drop = ['Open', 'High', 'Low', 'Close']
    google_items = ['Close']
    if Verbose:
        print "Load data"

    i=0
    number_tries = 0
    re_tries = 0
    for itick, ticker in enumerate(tickers):
        if Verbose:
            print "\t" + ticker + "  ",

        data = []
        dates = []

        #number_tries = 0
        ##try:
        # read in dataframe containing adjusted close quotes for a ticker in the list
        #print "number_tries = ", number_tries
        if number_tries < 11:
            #print "number_tries = ", number_tries, " trying with yahoo"
            '''
            try:
                data = get_data_yahoo(ticker, start = date1, end = date2)[items,:]
                number_tries = 0
            except:
                pass
            '''
            print "items = ", items
            data = get_data_yahoo(ticker, start = date1, end = date2)
            print "data = ", data
            print "type(data) = ", type(data)
            data = data['Adj Close']
            #data = data.drop(items_to_drop)
        else:
            #print "number_tries = ", number_tries, " trying with google"
            print "   ...retrieving quotes using google"
            try:
                data = get_data_google(ticker, start = date1, end = date2)[google_items]
                number_tries = 0
            except:
                pass
        #print ' data = ', data
        dates = data.index
        #print ' dates = ', dates
        dates = [d.to_datetime() for d in dates]
        
        data.columns = [ticker]
        #print ' ticker = ', [ticker]
        #print ' data.columns = ', data.columns
        if Verbose:
            print i," of ",len(tickers)," ticker ",ticker," has ",data.shape[0]," quotes"

        if itick-re_tries == 0:
            #print " creating dataframe..."
            quotes_df = data
        else:
            #print " joining to dataframe..."
            quotes_df = quotes_df.join( data, how='outer' )
            #print " joined to dataframe..."
        i += 1
        '''
        except:
            print "could not get quotes for ", ticker, "         will try again and again.", number_tries
            sleep(3)
            number_tries += 1
            re_tries += 1
            if number_tries < 20:
                tickers[itick+1:itick+1] = [ticker]
        '''

    print "number of tickers successfully processed = ", i
    if i > 0 :
        quotes_df.sort_index( axis=0, inplace=True )
        return quotes_df

    else :
        # return empty DataFrame
        quotes_df = pd.DataFrame( [0,0], ['Dates',date2])
        quotes_df.columns = ['None']

    return quotes_df
Example #24
0
import pandas as pd
from datetime import datetime
from pandas_datareader import data

start = datetime(2017, 1, 1)
end = datetime(2017, 4, 30)

df = data.get_data_google("KRX:KOSPI", start, end)
df.head()
Example #25
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas_datareader import data as dr
import datetime
import seaborn

end_date=datetime.datetime.now()
start_date=end_date-datetime.timedelta(days=365)

data=dr.get_data_google('AMD',start=start_date,end=end_date)

print(data.head())
from pandas_datareader import data
import pandas as pd
import numpy as np
import talib as ta
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.gridspec as gridspec
from matplotlib.dates import date2num
from matplotlib.finance import candlestick_ohlc as candlestick
import datetime

ticker = 'OPK'

# Download sample data
sec_id = data.get_data_google(ticker, '2014-06-01')

# Data for matplotlib finance plot
sec_id_ochl = np.array(
    pd.DataFrame({
        '0': date2num(sec_id.index.to_pydatetime()),
        '1': sec_id.Open,
        '2': sec_id.Close,
        '3': sec_id.High,
        '4': sec_id.Low
    }))

# Technical Analysis
SMA_FAST = 50
SMA_SLOW = 200
RSI_PERIOD = 14
RSI_AVG_PERIOD = 15
 def test_get_multi_invalid(self):
     with warnings.catch_warnings(record=True):
         sl = ['AAPL', 'AMZN', 'INVALID']
         pan = web.get_data_google(sl, '2012')
         assert 'INVALID' in pan.minor_axis
Example #28
0
df.sum()
df.sum(axis=1)
df.mean(axis=1,skipna=False)
df.mean(axis=1)
df.idxmax()
df.cumsum()
df.cumsum(axis=1)
df.describe()
obj = Series(['a','a','b','c'] * 4)
obj.describe()
obj
%run Dataframe.py
from pandas_datareader import data
all_data = {}
for ticker in ['AAPL','IBM','MSFT','GOOG']:
    all_data[ticker] = data.get_data_google(ticker,'1/1/2000','1/1/2010')
price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteriterms()})
price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()})
all_data.iteritems()
a,b in for a, b in all_data.iteritems()
a,b for a, b in all_data.iteritems()
[(a,b) for a, b in all_data.iteritems()]
price = DataFrame({tic: data['Close'] for tic, data in all_data.iteriterms()})
price = DataFrame({tic: data['Close'] for tic, data in all_data.iteritems()})
volume = DataFrame({tic: data['Volumn'] for tic, data in all_data.iteritems()})
volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()})
returns = price.pct_change()
returns.tail()
returns.MSFT.corr(returns.IBM)
returns.MSFT.cov(returns.IBM)
returns.corr()
 def test_unicode_date(self):
     # see gh-8967
     data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
     assert data.index.name == 'Date'
Example #30
0
 def test_get_goog_volume(self):
     for locale in self.locales:
         with tm.set_locale(locale):
             df = web.get_data_google('GOOG').sort_index()
         self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473)
Example #31
0
 def test_unicode_date(self):
     #GH8967
     data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
     self.assertEquals(data.index.name, 'Date')
Example #32
0
Created on Mon Jun 12 16:07:55 2017

@author: Varun Divakar
"""

from pandas_datareader import data as web
import numpy as np
import pandas as pd
from sklearn import mixture as mix
import seaborn as sns
import matplotlib.pyplot as plt
import talib as ta
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

df = web.get_data_google('SPY', start='2015-01-01', end='2016-08-01')
df = df[['Open', 'High', 'Low', 'Close']]

n = 10
t = 0.8
split = int(t * len(df))

df['high'] = df['High'].shift(1)
df['low'] = df['Low'].shift(1)
df['close'] = df['Close'].shift(1)
df['RSI'] = ta.RSI(np.array(df['close']), timeperiod=n)
df['SMA'] = df['close'].rolling(window=n).mean()
df['Corr'] = df['SMA'].rolling(window=n).corr(df['close'])
df['SAR']=ta.SAR(np.array(df['high']),np.array(df['low']),\
                  0.2,0.2)
df['ADX']=ta.ADX(np.array(df['high']),np.array(df['low']),\
Example #33
0
 def test_get_multi_invalid(self):
     sl = ['AAPL', 'AMZN', 'INVALID']
     pan = web.get_data_google(sl, '2012')
     self.assertIn('INVALID', pan.minor_axis)
Example #34
0
    def _get(cls, symbol, date_from, date_to):
        # FIXME: temporary fix
        from pandas_datareader.google.daily import GoogleDailyReader

        GoogleDailyReader.url = 'http://finance.google.com/finance/historical'
        return get_data_google(symbol, date_from, date_to)
	def _mine_from_url(self, url):
		html = BeautifulSoup(requests.get(url, headers=self.HEADERS).text, "html5lib")
		self.logger.info('searching for table with class - {}'.format(self.PRICING_TABLE_CLASS))
		table = html.find('div', {'class': 'genTable'})
		if not table or not table.text or not table.tbody or not table.tbody.text or not table.tbody.find_all('tr'):
			self.logger.warn('did not find anything for url - {}; skipping'.format(url))
			return []

		entries = []

		rows = table.tbody.find_all('tr')
		for i, row in enumerate(rows):
			columns = row.find_all('td')
			if not columns:
				self.logger.warn('skipping entry #{} - no columns found'.format(i))
				continue

			company = columns[0].a.text
			company_url = columns[0].a.attrs['href']
			symbol = columns[1].a.text
			market = columns[2].text
			price = columns[3].text
			shares = columns[4].text
			amount = columns[5].text
			date = columns[6].text
			price_num = float(re.sub('[^\d\.]+', '', price))
			self.logger.info('Mining entry #{} for company - {}'.format(i, company))
			try:
				ipo_data = self._mine_company_url(company_url)
			except Exception as e:
				self.logger.exception('failed fetching company data for - #{} - {}'.format(i, company))
				ipo_data = {}

			try:
				end_date = dateutil.parser.parse(date) + timedelta(days=1)
				trade_data = web.get_data_google(symbol, date, end_date.strftime('%m/%d/%Y'))
				first_day_open = trade_data.ix[0].Open
				first_day_close = trade_data.ix[0].Close
				first_day_change = float((first_day_close - first_day_open) / first_day_open * 100) if trade_data is not None else None
				first_day_ipo_change = float((first_day_close - price_num) / price_num * 100) if trade_data is not None else None
				first_day_positive = first_day_change > 0 if first_day_change is not None else None
				trade_data = json.loads(trade_data.to_json())
			except Exception as e:
				self.logger.exception('failed fetching finance data for - #{} - {}'.format(i, company))
				trade_data = {}
				first_day_change = None
				first_day_positive = None
				first_day_open = None
				first_day_close = None
				first_day_ipo_change = None

			entries.append({
				'company': company,
				'company_url': company_url,
				'symbol': symbol,
				'market': market,
				'price': price,
				'shares': shares,
				'amount': amount,
				'date': date,
				'ipo_data': ipo_data,
				'trade_data': trade_data,
				'first_day_market_change': first_day_change,
				'first_day_market_positive': first_day_positive,
				'first_day_open': first_day_open,
				'first_day_close': first_day_close,
				'price_num': price_num,
				'first_day_ipo_change': first_day_ipo_change
			})

		return entries
Example #36
0
 def test_get_multi_invalid(self):
     sl = ['AAPL', 'AMZN', 'INVALID']
     data = web.get_data_google(sl, '2012')
     assert 'INVALID' in data.columns.levels[1]
Example #37
0
 def test_get_goog_volume(self):
     for locale in self.locales:
         with tm.set_locale(locale):
             df = web.get_data_google('GOOG').sort_index()
         self.assertEqual(df.Volume.ix['JAN-02-2015'], 1446662)
Example #38
0
 def test_get_multi_all_invalid(self):
     sl = ['INVALID', 'INVALID2', 'INVALID3']
     with pytest.raises(RemoteDataError):
         web.get_data_google(sl, '2012')
Example #39
0
 def test_get_multi_invalid(self):
     sl = ['AAPL', 'AMZN', 'INVALID']
     pan = web.get_data_google(sl, '2012')
     self.assertIn('INVALID', pan.minor_axis)
Example #40
0
def downloadQuotes(tickers,
                   date1=None,
                   date2=None,
                   adjust=True,
                   Verbose=False):
    """
    Given a ticker sequence, return historical Yahoo! quotes as a pandas DataFrame.

    Parameters
    ----------
    tickers : sequence
        A sequence (such as a list) of string tickers. For example:
        ['aapl', 'msft']
    date1 : {datetime.date, tuple}, optional
        The first date to grab historical quotes on. For example:
        datetime.date(2010, 1, 1) or (2010, 1, 1). By default the first
        date is (1900, 1, 1).
    date2 : {datetime.date, tuple}, optional
        The last date to grab historical quotes on. For example:
        datetime.date(2010, 12, 31) or (2010, 12, 31). By default the last
        date is 10 days beyond today's date.
    adjust : bool, optional
        Adjust (default) the open, close, high, and low prices. The
        adjustment takes splits and dividends into account such that the
        corresponding returns are correct. Volume is already split adjusted
        by Yahoo so it is not changed by the value of `adjust`.
    Verbose : bool, optional
        Print the ticker currently being loaded. By default the tickers are
        not printed.

    Returns
    -------
    quotes_df : DataFrame
        A pandas dataframe is returned. In order, the  axes contain: dates,
        quotes (adjusted close). The elements along the item axis depend on the value
        of `adjust`. When `adjust` is False, the items are

        ['open', 'close', 'high', 'low', 'volume', 'adjclose']

        When adjust is true (default), the adjusted close ('adjclose') is
        not included. The dates are datetime.date objects.

    Examples
    --------
    items = ['Adj Close']
    date1 = '2012-01-01'
    date2 = datetime.date.today()
    ticker = 'GOOGL'
    data = get_data_yahoo(ticker, start = date1, end = date2)[items]
    dates = data.index
    data.columns = [ticker]

    ticker = 'AMZN'
    data2 = get_data_yahoo(ticker, start = date1, end = date2)[items]
    dates2 = data2.index
    data2.columns = [ticker]

    data = data.join(data2, how='outer')
    data.sort_index( axis=0, inplace=True )

    data.tail()

                 GOOGL    AMZN
    Date
    2014-04-07  540.63  317.76
    2014-04-08  557.51  327.07
    2014-04-09  567.04  331.81
    2014-04-10  546.69  317.11
    2014-04-11  537.76  311.73


    """

    from time import sleep
    from matplotlib.finance import *
    #from la.external.matplotlib import quotes_historical_yahoo
    import pandas as pd
    from pandas_datareader.data import DataReader, get_data_yahoo, get_data_google
    ##from pandas.io.data import DataReader
    ##from pandas.io.data import get_data_yahoo, get_data_google
    #import la

    if date1 is None:
        date1 = datetime.date(1900, 1, 1)
    if date2 is None:
        date2 = datetime.date.today() + datetime.timedelta(+10)
    #quotes_df = None
    #lar = None
    items = ['Adj Close', 'volume']
    items_to_drop = ['Open', 'High', 'Low', 'Close']
    google_items = ['Close']
    if Verbose:
        print "Load data"

    i = 0
    number_tries = 0
    re_tries = 0
    for itick, ticker in enumerate(tickers):
        if Verbose:
            print "\t" + ticker + "  ",

        data = []
        dates = []

        #number_tries = 0
        ##try:
        # read in dataframe containing adjusted close quotes for a ticker in the list
        #print "number_tries = ", number_tries
        if number_tries < 11:
            #print "number_tries = ", number_tries, " trying with yahoo"
            '''
            try:
                data = get_data_yahoo(ticker, start = date1, end = date2)[items,:]
                number_tries = 0
            except:
                pass
            '''
            print "items = ", items
            data = get_data_yahoo(ticker, start=date1, end=date2)
            print "data = ", data
            print "type(data) = ", type(data)
            data = data['Adj Close']
            #data = data.drop(items_to_drop)
        else:
            #print "number_tries = ", number_tries, " trying with google"
            print "   ...retrieving quotes using google"
            try:
                data = get_data_google(ticker, start=date1,
                                       end=date2)[google_items]
                number_tries = 0
            except:
                pass
        #print ' data = ', data
        dates = data.index
        #print ' dates = ', dates
        dates = [d.to_datetime() for d in dates]

        data.columns = [ticker]
        #print ' ticker = ', [ticker]
        #print ' data.columns = ', data.columns
        if Verbose:
            print i, " of ", len(
                tickers), " ticker ", ticker, " has ", data.shape[0], " quotes"

        if itick - re_tries == 0:
            #print " creating dataframe..."
            quotes_df = data
        else:
            #print " joining to dataframe..."
            quotes_df = quotes_df.join(data, how='outer')
            #print " joined to dataframe..."
        i += 1
        '''
        except:
            print "could not get quotes for ", ticker, "         will try again and again.", number_tries
            sleep(3)
            number_tries += 1
            re_tries += 1
            if number_tries < 20:
                tickers[itick+1:itick+1] = [ticker]
        '''

    print "number of tickers successfully processed = ", i
    if i > 0:
        quotes_df.sort_index(axis=0, inplace=True)
        return quotes_df

    else:
        # return empty DataFrame
        quotes_df = pd.DataFrame([0, 0], ['Dates', date2])
        quotes_df.columns = ['None']

    return quotes_df