Beispiel #1
0
    def test_get_data(self):
        import numpy as np
        df = web.get_data_google('GOOG')
        print(df.Volume.ix['OCT-08-2010'])
        assert df.Volume.ix['OCT-08-2010'] == 2863473

        sl = ['AAPL', 'AMZN', 'GOOG']
        pan = web.get_data_google(sl, '2012')
        ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
        assert ts[0].dayofyear == 96

        pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12')
        expected = [19.02, 28.23, 25.39]
        result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist()
        assert result == expected

        # sanity checking
        t= np.array(result)
        assert     np.issubdtype(t.dtype, np.floating)
        assert     t.shape == (3,)

        expected = [[ 18.99,  28.4 ,  25.18],
                    [ 18.58,  28.31,  25.13],
                    [ 19.03,  28.16,  25.52],
                    [ 18.81,  28.82,  25.87]]
        result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values
        assert (result == expected).all()

        # sanity checking
        t= np.array(pan)
        assert     np.issubdtype(t.dtype, np.floating)
Beispiel #2
0
    def handle(self, *args, **options):
        if '-c' in args or '--clear' in args or options.get('clear'):
            self.clear()
            return

        months = []
        if len(args) == 2:
            for arg in args:
                try:
                    dt = datetime.strptime(arg, '%Y-%m-%d')
                except:
                    print("wrong data format format, need YYYY-MM-DD")
                    raise
                else:
                    months.append(dt)
        else:
            months = [self.start_time, (date.today() - timedelta(days=1))]

        all_data = {}
        quantize = Decimal('0.01')
        for ticker in NegotiablePaper.objects.all():
            data_series = web.get_data_google(ticker.name, *months)
            cache_gen = [DataCache(paper=ticker,
                                   date=ind,
                                   price=self.get_price(data_series.Close[ind])) \
                            for ind in data_series.index]
            DataCache.objects.bulk_create(cache_gen)
Beispiel #3
0
 def test_dtypes(self):
     #GH3995
     data = web.get_data_google('MSFT', 'JAN-01-12', 'JAN-31-12')
     assert np.issubdtype(data.Open.dtype, np.number)
     assert np.issubdtype(data.Close.dtype, np.number)
     assert np.issubdtype(data.Low.dtype, np.number)
     assert np.issubdtype(data.High.dtype, np.number)
     assert np.issubdtype(data.Volume.dtype, np.number)
Beispiel #4
0
 def test_dtypes(self):
     #GH3995, #GH8980
     data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
     self.assertTrue(np.issubdtype(data.Open.dtype, np.number))
     self.assertTrue(np.issubdtype(data.Close.dtype, np.number))
     self.assertTrue(np.issubdtype(data.Low.dtype, np.number))
     self.assertTrue(np.issubdtype(data.High.dtype, np.number))
     self.assertTrue(np.issubdtype(data.Volume.dtype, np.number))
Beispiel #5
0
 def test_dtypes(self):
     #GH3995, #GH8980
     data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
     self.assertTrue(np.issubdtype(data.Open.dtype, np.number))
     self.assertTrue(np.issubdtype(data.Close.dtype, np.number))
     self.assertTrue(np.issubdtype(data.Low.dtype, np.number))
     self.assertTrue(np.issubdtype(data.High.dtype, np.number))
     self.assertTrue(np.issubdtype(data.Volume.dtype, np.number))
Beispiel #6
0
 def test_dtypes(self):
     #GH3995
     data = web.get_data_google('MSFT', 'JAN-01-12', 'JAN-31-12')
     assert np.issubdtype(data.Open.dtype, np.number)
     assert np.issubdtype(data.Close.dtype, np.number)
     assert np.issubdtype(data.Low.dtype, np.number)
     assert np.issubdtype(data.High.dtype, np.number)
     assert np.issubdtype(data.Volume.dtype, np.number)
def get_time_series(comm_id):
    today=datetime.date.today()
    lastyear=today-datetime.timedelta(364)
    f = web.get_data_google(comm_id,lastyear, today)
    f.index=f.index.strftime('%Y-%m-%d')
    x=f.Close.quantile([.1,.25,.5,.75,.9]).to_json()
    ts = f.Close.to_json()
    finalObj = { 'ts': ts, 'quantiles': x }
    return (finalObj)
Beispiel #8
0
 def test_get_multi1(self):
     for locale in self.locales:
         sl = ['AAPL', 'AMZN', 'GOOG']
         with tm.set_locale(locale):
             pan = web.get_data_google(sl, '2012')
         ts = pan.Close.GOOG.index[pan.Close.AAPL < pan.Close.GOOG]
         if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and
             hasattr(pan.Close, 'AAPL')):
             self.assertEqual(ts[0].dayofyear, 3)
         else:
             self.assertRaises(AttributeError, lambda: pan.Close)
Beispiel #9
0
 def test_get_multi1(self):
     for locale in self.locales:
         sl = ['AAPL', 'AMZN', 'GOOG']
         with tm.set_locale(locale):
             pan = web.get_data_google(sl, '2012', '2013')
         ts = pan.Close.GOOG.index[pan.Close.AAPL < pan.Close.GOOG]
         if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG')
                 and hasattr(pan.Close, 'AAPL')):
             self.assertEqual(ts[0].dayofyear, 3)
         else:
             self.assertRaises(AttributeError, lambda: pan.Close)
Beispiel #10
0
    def test_get_multi1(self):
        sl = ["AAPL", "AMZN", "GOOG"]
        pan = web.get_data_google(sl, "2012")

        def testit():
            ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
            self.assertEquals(ts[0].dayofyear, 96)

        if hasattr(pan, "Close") and hasattr(pan.Close, "GOOG") and hasattr(pan.Close, "AAPL"):
            testit()
        else:
            self.assertRaises(AttributeError, testit)
Beispiel #11
0
    def test_get_multi2(self):
        with warnings.catch_warnings(record=True) as w:
            pan = web.get_data_google(["GE", "MSFT", "INTC"], "JAN-01-12", "JAN-31-12")
            result = pan.Close.ix["01-18-12"]
            assert_n_failed_equals_n_null_columns(w, result)

            # sanity checking

            assert np.issubdtype(result.dtype, np.floating)
            result = pan.Open.ix["Jan-15-12":"Jan-20-12"]
            self.assertEqual((4, 3), result.shape)
            assert_n_failed_equals_n_null_columns(w, result)
Beispiel #12
0
    def test_get_multi1(self):
        sl = ['AAPL', 'AMZN', 'GOOG']
        pan = web.get_data_google(sl, '2012')

        def testit():
            ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
            self.assertEquals(ts[0].dayofyear, 96)

        if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and
            hasattr(pan.Close, 'AAPL')):
            testit()
        else:
            self.assertRaises(AttributeError, testit)
Beispiel #13
0
    def test_get_multi2(self):
        with warnings.catch_warnings(record=True) as w:
            pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12',
                                      'JAN-31-12')
            result = pan.Close.ix['01-18-12']
            assert_n_failed_equals_n_null_columns(w, result)

            # sanity checking

            assert np.issubdtype(result.dtype, np.floating)
            result = pan.Open.ix['Jan-15-12':'Jan-20-12']
            self.assertEqual((4, 3), result.shape)
            assert_n_failed_equals_n_null_columns(w, result)
Beispiel #14
0
    def test_get_multi1(self):
        sl = ['AAPL', 'AMZN', 'GOOG']
        pan = web.get_data_google(sl, '2012')

        def testit():
            ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
            self.assertEquals(ts[0].dayofyear, 96)

        if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG')
                and hasattr(pan.Close, 'AAPL')):
            testit()
        else:
            self.assertRaises(AttributeError, testit)
Beispiel #15
0
    def test_get_multi2(self):
        with warnings.catch_warnings(record=True) as w:
            pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12',
                                      'JAN-31-12')
            result = pan.Close.ix['01-18-12']
            assert_n_failed_equals_n_null_columns(w, result)

            # sanity checking

            assert np.issubdtype(result.dtype, np.floating)
            result = pan.Open.ix['Jan-15-12':'Jan-20-12']
            self.assertEqual((4, 3), result.shape)
            assert_n_failed_equals_n_null_columns(w, result)
Beispiel #16
0
    def test_get_multi2(self):
        pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12',
                                  'JAN-31-12')
        result = pan.Close.ix['01-18-12']
        self.assertEqual(len(result), 3)

        # sanity checking
        assert np.issubdtype(result.dtype, np.floating)

        expected = np.array([[ 18.99,  28.4 ,  25.18],
                             [ 18.58,  28.31,  25.13],
                             [ 19.03,  28.16,  25.52],
                             [ 18.81,  28.82,  25.87]])
        result = pan.Open.ix['Jan-15-12':'Jan-20-12']
        self.assertEqual(np.array(expected).shape, result.shape)
Beispiel #17
0
    def test_get_multi2(self):
        pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12')
        expected = [19.02, 28.23, 25.39]
        result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist()
        assert result == expected

        # sanity checking
        t= np.array(result)
        assert     np.issubdtype(t.dtype, np.floating)
        assert     t.shape == (3,)

        expected = [[ 18.99,  28.4 ,  25.18],
                    [ 18.58,  28.31,  25.13],
                    [ 19.03,  28.16,  25.52],
                    [ 18.81,  28.82,  25.87]]
        result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values
        assert (result == expected).all()

        # sanity checking
        t= np.array(pan)
        assert np.issubdtype(t.dtype, np.floating)
Beispiel #18
0
 def test_get_multi_invalid(self):
     sl = ['AAPL', 'AMZN', 'INVALID']
     with tm.assert_produces_warning(SymbolWarning):
         pan = web.get_data_google(sl, '2012')
         self.assertIn('INVALID', pan.minor_axis)
Beispiel #19
0
 def test_get_goog_volume(self):
     for locale in self.locales:
         with tm.set_locale(locale):
             df = web.get_data_google('GOOG').sort_index()
         self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473)
Beispiel #20
0
def downloadQuotes(tickers,
                   date1=None,
                   date2=None,
                   adjust=True,
                   Verbose=False):
    """
    Given a ticker sequence, return historical Yahoo! quotes as a pandas DataFrame.

    Parameters
    ----------
    tickers : sequence
        A sequence (such as a list) of string tickers. For example:
        ['aapl', 'msft']
    date1 : {datetime.date, tuple}, optional
        The first date to grab historical quotes on. For example:
        datetime.date(2010, 1, 1) or (2010, 1, 1). By default the first
        date is (1900, 1, 1).
    date2 : {datetime.date, tuple}, optional
        The last date to grab historical quotes on. For example:
        datetime.date(2010, 12, 31) or (2010, 12, 31). By default the last
        date is 10 days beyond today's date.
    adjust : bool, optional
        Adjust (default) the open, close, high, and low prices. The
        adjustment takes splits and dividends into account such that the
        corresponding returns are correct. Volume is already split adjusted
        by Yahoo so it is not changed by the value of `adjust`.
    Verbose : bool, optional
        Print the ticker currently being loaded. By default the tickers are
        not printed.

    Returns
    -------
    quotes_df : DataFrame
        A pandas dataframe is returned. In order, the  axes contain: dates,
        quotes (adjusted close). The elements along the item axis depend on the value
        of `adjust`. When `adjust` is False, the items are

        ['open', 'close', 'high', 'low', 'volume', 'adjclose']

        When adjust is true (default), the adjusted close ('adjclose') is
        not included. The dates are datetime.date objects.

    Examples
    --------
    items = ['Adj Close']
    date1 = '2012-01-01'
    date2 = datetime.date.today()
    ticker = 'GOOGL'
    data = get_data_yahoo(ticker, start = date1, end = date2)[items]
    dates = data.index
    data.columns = [ticker]

    ticker = 'AMZN'
    data2 = get_data_yahoo(ticker, start = date1, end = date2)[items]
    dates2 = data2.index
    data2.columns = [ticker]

    data = data.join(data2, how='outer')
    data.sort_index( axis=0, inplace=True )

    data.tail()

                 GOOGL    AMZN
    Date
    2014-04-07  540.63  317.76
    2014-04-08  557.51  327.07
    2014-04-09  567.04  331.81
    2014-04-10  546.69  317.11
    2014-04-11  537.76  311.73


    """

    from time import sleep
    from matplotlib.finance import *
    #from la.external.matplotlib import quotes_historical_yahoo
    import pandas as pd
    from pandas.io.data import DataReader
    from pandas.io.data import get_data_yahoo, get_data_google
    #import la

    if date1 is None:
        date1 = datetime.date(1900, 1, 1)
    if date2 is None:
        date2 = datetime.date.today() + datetime.timedelta(+10)
    #quotes_df = None
    #lar = None
    items = ['Adj Close']
    google_items = ['Close']
    if Verbose:
        print "Load data"

    i = 0
    number_tries = 0
    re_tries = 0
    for itick, ticker in enumerate(tickers):
        if Verbose:
            print "\t" + ticker + "  ",

        data = []
        dates = []

        #number_tries = 0
        try:
            # read in dataframe containing adjusted close quotes for a ticker in the list
            #print "number_tries = ", number_tries
            if number_tries < 11:
                #print "number_tries = ", number_tries, " trying with yahoo"
                try:
                    data = get_data_yahoo(ticker, start=date1,
                                          end=date2)[items]
                    number_tries = 0
                except:
                    pass
            else:
                #print "number_tries = ", number_tries, " trying with google"
                print "   ...retrieving quotes using google"
                try:
                    data = get_data_google(ticker, start=date1,
                                           end=date2)[google_items]
                    number_tries = 0
                except:
                    pass
            #print ' data = ', data
            dates = data.index
            #print ' dates = ', dates
            dates = [d.to_datetime() for d in dates]

            data.columns = [ticker]
            #print ' ticker = ', [ticker]
            #print ' data.columns = ', data.columns
            if Verbose:
                print i, " of ", len(
                    tickers
                ), " ticker ", ticker, " has ", data.shape[0], " quotes"

            if itick - re_tries == 0:
                #print " creating dataframe..."
                quotes_df = data
            else:
                #print " joining to dataframe..."
                quotes_df = quotes_df.join(data, how='outer')
                #print " joined to dataframe..."
            i += 1
        except:
            print "could not get quotes for ", ticker, "         will try again and again.", number_tries
            sleep(3)
            number_tries += 1
            re_tries += 1
            if number_tries < 20:
                tickers[itick + 1:itick + 1] = [ticker]

    print "number of tickers successfully processed = ", i
    if i > 0:
        quotes_df.sort_index(axis=0, inplace=True)
        return quotes_df

    else:
        # return empty DataFrame
        quotes_df = pd.DataFrame([0, 0], ['Dates', date2])
        quotes_df.columns = ['None']

    return quotes_df
Beispiel #21
0
def get_data(tickerList, fromDate, toDate):
    data = web.get_data_google(tickerList, fromDate, toDate, ret_index=True)
    return data
Beispiel #22
0
def data_web_import_view(request, symbol=''):
    """
    Select symbol for web get google data
    :param symbol: str
    :param request: request
    :return: render
    """
    # noinspection PyShadowingNames
    def create_stock(symbol, index, data, source):
        """
        Create a stock object
        :param symbol: str
        :param index: datetime
        :param data: dict
        :param source: str
        :return: Stock
        """
        return Stock(
            symbol=symbol,
            date=index.strftime('%Y-%m-%d'),
            open=data['Open'],
            high=data['High'],
            low=data['Low'],
            close=data['Close'],
            volume=data['Volume'],
            source=source
        )

    template = 'data/run_web.html'

    if symbol:
        symbol = symbol.upper()
        stocks = Stock.objects.filter(symbol=symbol)

        if stocks.exists():
            tos_thinkback = stocks.filter(source='tos_thinkback').order_by('date')

            if tos_thinkback.exists() and tos_thinkback.count() > 1:
                tb_first_date = tos_thinkback.first().date
                tb_last_date = tos_thinkback.last().date
            else:
                raise LookupError(
                    '< {symbol} > No enough stock data from source tos_thinkback.'.format(
                        symbol=symbol
                    )
                )

            web_stocks = stocks.filter(Q(source='google') | Q(source='yahoo')).order_by('date')
            new_stocks = list()

            if web_stocks.exists():
                # generate a list of date then web get
                # tos_thinkback_dates = [stock.date for stock in tos_thinkback]

                #print tos_thinkback_dates
                # generate a list of bday using tb date, remove date from list using google date
                google_dates = [stock.date.strftime('%Y-%m-%d') for stock
                                in web_stocks.filter(source='google')]

                yahoo_dates = [stock.date.strftime('%Y-%m-%d') for stock
                               in web_stocks.filter(source='yahoo')]

                google_data = get_data_google(
                    symbols=symbol,
                    #start='2015-04-01', end='2015-04-10',  # test only
                    start=tb_first_date, end=tb_last_date,
                    adjust_price=True
                )

                for index, data in google_data.iterrows():
                    if index.strftime('%Y-%m-%d') not in google_dates:
                        # not found for google, insert db
                        if int(data['Volume']) > 0:
                            stock = create_stock(symbol, index, data, 'google')
                            new_stocks.append(stock)

                yahoo_data = get_data_yahoo(
                    symbols=symbol,
                    # start='2015-04-01', end='2015-04-10',  # test only
                    start=tb_first_date, end=tb_last_date,
                    adjust_price=True
                )

                for index, data in yahoo_data.iterrows():
                    if index.strftime('%Y-%m-%d') not in yahoo_dates:
                        # not found for google, insert db
                        if int(data['Volume']) > 0:
                            stock = create_stock(symbol, index, data, 'yahoo')
                            new_stocks.append(stock)

            else:
                # import all data, google
                google_data = get_data_google(
                    symbols=symbol,
                    # start='2015-04-01', end='2015-04-10',  # test only
                    start=tb_first_date, end=tb_last_date,
                    adjust_price=True
                )

                for index, data in google_data.iterrows():
                    if int(data['Volume']) > 0:
                        stock = create_stock(symbol, index, data, 'google')
                        new_stocks.append(stock)

                    #print index, data['Open'], data['High'], data['Low'], data['Close'], data['Volume']

                # import all data, yahoo
                yahoo_data = get_data_yahoo(
                    symbols=symbol,
                    # start='2015-04-01', end='2015-04-10',  # test only
                    start=tb_first_date, end=tb_last_date,
                    adjust_price=True
                )

                for index, data in yahoo_data.iterrows():
                    if int(data['Volume']) > 0:
                        stock = create_stock(symbol, index, data, 'yahoo')
                        new_stocks.append(stock)

            # bulk insert
            if len(new_stocks):
                Stock.objects.bulk_create(new_stocks)

        else:
            raise ObjectDoesNotExist(
                "No stock data on < {symbol} >, run tos_thinkback before get google".format(
                    symbol=symbol
                )
            )
    else:
        raise ValueError('Symbol is blank.')

    # reset
    # Stock.objects.filter(symbol=symbol).filter(source='google').delete()

    parameters = dict(
        symbol=symbol,
        stocks=new_stocks
    )

    return render(request, template, parameters)
Beispiel #23
0
 def test_get_goog_volume(self):
     df = web.get_data_google('GOOG')
     self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473)
Beispiel #24
0
 def test_get_goog_volume(self):
     for locale in self.locales:
         with tm.set_locale(locale):
             df = web.get_data_google('GOOG').sort_index()
         self.assertEqual(df.Volume.ix['JAN-02-2015'], 1446662)
Beispiel #25
0
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 15 17:31:39 2017

Examples from the Book Python for Data Analysis, on Data Frames

@author: xuehuachen
"""

from pandas import DataFrame
import pandas as pd
import pandas.io.data as web

all_data = {}
for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']:
    all_data[ticker] = web.get_data_google(ticker)

price = DataFrame({tic: data['Close'] for tic, data in all_data.iteritems()})
volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()})

returns = price.pct_change()

# Correlation and Covariance
returns.MSFT.corr(returns.IBM)
returns.MSFT.cov(returns.IBM)
returns.corr()
returns.cov()

returns.corrwith(returns.IBM)
returns.corrwith(volume)
Beispiel #26
0
 def test_get_goog_volume(self):
     df = web.get_data_google('GOOG')
     self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473)
import pandas.io.data as web
import datetime
import pandas
import simplejson

start = datetime.datetime(2015, 1, 20)

end = datetime.datetime(2015, 1, 27)

datetime.date.today()-datetime.timedelta(364)

f = web.get_data_google("cow",start, end)

g = web.Options('cow', 'yahoo')


def get_time_series(comm_id):
    today=datetime.date.today()
    lastyear=today-datetime.timedelta(364)
    f = web.get_data_google(comm_id,lastyear, today)
    f.index=f.index.strftime('%Y-%m-%d')
    x=f.Close.quantile([.1,.25,.5,.75,.9]).to_json()
    ts = f.Close.to_json()
    finalObj = { 'ts': ts, 'quantiles': x }
    return (finalObj)

def intuit_to_commid(intuitid):
    if intuitid == "beef":
        return "cow"
    if intuitid == "corn":
        return intuitid
Beispiel #28
0
 def test_get_multi_invalid(self):
     sl = ['AAPL', 'AMZN', 'INVALID']
     pan = web.get_data_google(sl, '2012')
     self.assertIn('INVALID', pan.minor_axis)
Beispiel #29
0
 def test_get_goog_volume(self):
     for locale in self.locales:
         with tm.set_locale(locale):
             df = web.get_data_google('GOOG').sort_index()
         self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473)
Beispiel #30
0
 def test_get_multi_invalid(self):
     sl = ['AAPL', 'AMZN', 'INVALID']
     with tm.assert_produces_warning(SymbolWarning):
         pan = web.get_data_google(sl, '2012')
         self.assertIn('INVALID', pan.minor_axis)
Beispiel #31
0
 def test_get_goog_volume(self):
     df = web.get_data_google('GOOG')
     assert df.Volume.ix['OCT-08-2010'] == 2863473
Beispiel #32
0
 def test_unicode_date(self):
     #GH8967
     data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
     self.assertEqual(data.index.name, 'Date')
Beispiel #33
0
 def test_get_goog_volume(self):
     for locale in self.locales:
         with tm.set_locale(locale):
             df = web.get_data_google('GOOG').sort_index()
         self.assertEqual(df.Volume.ix['JAN-02-2015'], 1446662)
Beispiel #34
0
                  index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns=[['Ohio', 'Ohio', 'Colorado'],
                           ['Green', 'Red', 'Green']])
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame.sum(level="color", axis=1)
frame.sum(level="state", axis=1)

ser = Series(np.arange(3.), index=list("abc"))
ser
ser[-1]
ser.iget_value(2)

import pandas.io.data as web
pdata = pd.Panel(
    dict((stk, web.get_data_google(stk, "1/1/2012", "12/30/2014"))
         for stk in ["AAPL", "GOOG", "MSFT", "DELL"]))
pdata
pdata = pdata.swapaxes("items", "minor")
pdata
pdata.ix[:, "12/3/2012", :]

import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
nsample = 100
x = np.linspace(0, 10, 100)
x
X = np.column_stack((x, x**2))
X
X = sm.add_constant(X)
Beispiel #35
0
def data_web_import_view(request, symbol=''):
    """
    Select symbol for web get google data
    :param symbol: str
    :param request: request
    :return: render
    """

    # noinspection PyShadowingNames
    def create_stock(symbol, index, data, source):
        """
        Create a stock object
        :param symbol: str
        :param index: datetime
        :param data: dict
        :param source: str
        :return: Stock
        """
        return Stock(symbol=symbol,
                     date=index.strftime('%Y-%m-%d'),
                     open=data['Open'],
                     high=data['High'],
                     low=data['Low'],
                     close=data['Close'],
                     volume=data['Volume'],
                     source=source)

    template = 'data/run_web.html'

    if symbol:
        symbol = symbol.upper()
        stocks = Stock.objects.filter(symbol=symbol)

        if stocks.exists():
            tos_thinkback = stocks.filter(
                source='tos_thinkback').order_by('date')

            if tos_thinkback.exists() and tos_thinkback.count() > 1:
                tb_first_date = tos_thinkback.first().date
                tb_last_date = tos_thinkback.last().date
            else:
                raise LookupError(
                    '< {symbol} > No enough stock data from source tos_thinkback.'
                    .format(symbol=symbol))

            web_stocks = stocks.filter(Q(source='google')
                                       | Q(source='yahoo')).order_by('date')
            new_stocks = list()

            if web_stocks.exists():
                # generate a list of date then web get
                # tos_thinkback_dates = [stock.date for stock in tos_thinkback]

                #print tos_thinkback_dates
                # generate a list of bday using tb date, remove date from list using google date
                google_dates = [
                    stock.date.strftime('%Y-%m-%d')
                    for stock in web_stocks.filter(source='google')
                ]

                yahoo_dates = [
                    stock.date.strftime('%Y-%m-%d')
                    for stock in web_stocks.filter(source='yahoo')
                ]

                google_data = get_data_google(
                    symbols=symbol,
                    #start='2015-04-01', end='2015-04-10',  # test only
                    start=tb_first_date,
                    end=tb_last_date,
                    adjust_price=True)

                for index, data in google_data.iterrows():
                    if index.strftime('%Y-%m-%d') not in google_dates:
                        # not found for google, insert db
                        if int(data['Volume']) > 0:
                            stock = create_stock(symbol, index, data, 'google')
                            new_stocks.append(stock)

                yahoo_data = get_data_yahoo(
                    symbols=symbol,
                    # start='2015-04-01', end='2015-04-10',  # test only
                    start=tb_first_date,
                    end=tb_last_date,
                    adjust_price=True)

                for index, data in yahoo_data.iterrows():
                    if index.strftime('%Y-%m-%d') not in yahoo_dates:
                        # not found for google, insert db
                        if int(data['Volume']) > 0:
                            stock = create_stock(symbol, index, data, 'yahoo')
                            new_stocks.append(stock)

            else:
                # import all data, google
                google_data = get_data_google(
                    symbols=symbol,
                    # start='2015-04-01', end='2015-04-10',  # test only
                    start=tb_first_date,
                    end=tb_last_date,
                    adjust_price=True)

                for index, data in google_data.iterrows():
                    if int(data['Volume']) > 0:
                        stock = create_stock(symbol, index, data, 'google')
                        new_stocks.append(stock)

                    #print index, data['Open'], data['High'], data['Low'], data['Close'], data['Volume']

                # import all data, yahoo
                yahoo_data = get_data_yahoo(
                    symbols=symbol,
                    # start='2015-04-01', end='2015-04-10',  # test only
                    start=tb_first_date,
                    end=tb_last_date,
                    adjust_price=True)

                for index, data in yahoo_data.iterrows():
                    if int(data['Volume']) > 0:
                        stock = create_stock(symbol, index, data, 'yahoo')
                        new_stocks.append(stock)

            # bulk insert
            if len(new_stocks):
                Stock.objects.bulk_create(new_stocks)

        else:
            raise ObjectDoesNotExist(
                "No stock data on < {symbol} >, run tos_thinkback before get google"
                .format(symbol=symbol))
    else:
        raise ValueError('Symbol is blank.')

    # reset
    # Stock.objects.filter(symbol=symbol).filter(source='google').delete()

    parameters = dict(symbol=symbol, stocks=new_stocks)

    return render(request, template, parameters)
Beispiel #36
0
def data_daily_import_view(request):
    """
    Import all csv files in daily folder
    then insert web data for that date
    :param request: request
    :return: render
    """
    template = 'data/daily.html'

    insert_files = list()

    files = [
        path for path in
        glob(os.path.join(THINKBACK_DIR, '_daily', '*.csv'))
    ]

    for f in files:
        contracts = 0
        options = 0

        # get filename and dir
        filename = os.path.basename(f)
        print 'running file: %s...' % filename
        date, symbol = map(
            lambda x: x.upper(),
            filename[:-4].split('-StockAndOptionQuoteFor')
        )

        # file into dict
        stock_data, option_data = OpenThinkBack(date=date, data=open(f).read()).format()

        # save stock
        stock = Stock()
        stock.symbol = symbol
        stock.source = 'tos_thinkback'
        stock.data = stock_data
        stock.save()

        # save contract and option
        for contract_dict, option_dict in option_data:
            try:
                contract = OptionContract.objects.get(option_code=contract_dict['option_code'])
            except ObjectDoesNotExist:
                contract = OptionContract()
                contract.symbol = symbol
                contract.source = 'tos_thinkback'
                contract.data = contract_dict
                contract.save()
                contracts += 1

            option = Option()
            option.option_contract = contract
            option.data = option_dict
            option.save()
            options += 1

        # move file into folder
        year = filename[:4]
        year_dir = os.path.join(THINKBACK_DIR, symbol, year)

        # make dir if not exists
        if not os.path.isdir(year_dir):
            os.mkdir(year_dir)

        os.rename(f, os.path.join(year_dir, os.path.basename(f)))

        # save data from web
        google_data = get_data_google(
            symbols=symbol,
            # start='2015-04-01', end='2015-04-10',  # test only
            start=date, end=date,
            adjust_price=True
        )

        yahoo_data = get_data_yahoo(
            symbols=symbol,
            # start='2015-04-01', end='2015-04-10',  # test only
            start=date, end=date,
            adjust_price=True
        )

        for index, data in google_data.iterrows():
            if int(data['Volume']) > 0:
                google_stock = Stock(
                    symbol=symbol,
                    date=index.strftime('%Y-%m-%d'),
                    open=data['Open'],
                    high=data['High'],
                    low=data['Low'],
                    close=data['Close'],
                    volume=data['Volume'],
                    source='google'
                )
                google_stock.save()

        for index, data in yahoo_data.iterrows():
            if int(data['Volume']) > 0:
                yahoo_stock = Stock(
                    symbol=symbol,
                    date=index.strftime('%Y-%m-%d'),
                    open=data['Open'],
                    high=data['High'],
                    low=data['Low'],
                    close=data['Close'],
                    volume=data['Volume'],
                    source='yahoo'
                )
                yahoo_stock.save()

        insert_files.append(
            dict(
                symbol=symbol,
                date=date,
                path=filename,
                stock=1,
                contracts=contracts,
                options=options
            )
        )

    parameters = dict(
        insert_files=insert_files
    )

    return render(request, template, parameters)
Beispiel #37
0
def data_daily_import_view(request):
    """
    Import all csv files in daily folder
    then insert web data for that date
    :param request: request
    :return: render
    """
    template = 'data/daily.html'

    insert_files = list()

    files = [
        path for path in glob(os.path.join(THINKBACK_DIR, '_daily', '*.csv'))
    ]

    for f in files:
        contracts = 0
        options = 0

        # get filename and dir
        filename = os.path.basename(f)
        print 'running file: %s...' % filename
        date, symbol = map(lambda x: x.upper(),
                           filename[:-4].split('-StockAndOptionQuoteFor'))

        # file into dict
        stock_data, option_data = OpenThinkBack(date=date,
                                                data=open(f).read()).format()

        # save stock
        stock = Stock()
        stock.symbol = symbol
        stock.source = 'tos_thinkback'
        stock.data = stock_data
        stock.save()

        # save contract and option
        for contract_dict, option_dict in option_data:
            try:
                contract = OptionContract.objects.get(
                    option_code=contract_dict['option_code'])
            except ObjectDoesNotExist:
                contract = OptionContract()
                contract.symbol = symbol
                contract.source = 'tos_thinkback'
                contract.data = contract_dict
                contract.save()
                contracts += 1

            option = Option()
            option.option_contract = contract
            option.data = option_dict
            option.save()
            options += 1

        # move file into folder
        year = filename[:4]
        year_dir = os.path.join(THINKBACK_DIR, symbol, year)

        # make dir if not exists
        if not os.path.isdir(year_dir):
            os.mkdir(year_dir)

        os.rename(f, os.path.join(year_dir, os.path.basename(f)))

        # save data from web
        google_data = get_data_google(
            symbols=symbol,
            # start='2015-04-01', end='2015-04-10',  # test only
            start=date,
            end=date,
            adjust_price=True)

        yahoo_data = get_data_yahoo(
            symbols=symbol,
            # start='2015-04-01', end='2015-04-10',  # test only
            start=date,
            end=date,
            adjust_price=True)

        for index, data in google_data.iterrows():
            if int(data['Volume']) > 0:
                google_stock = Stock(symbol=symbol,
                                     date=index.strftime('%Y-%m-%d'),
                                     open=data['Open'],
                                     high=data['High'],
                                     low=data['Low'],
                                     close=data['Close'],
                                     volume=data['Volume'],
                                     source='google')
                google_stock.save()

        for index, data in yahoo_data.iterrows():
            if int(data['Volume']) > 0:
                yahoo_stock = Stock(symbol=symbol,
                                    date=index.strftime('%Y-%m-%d'),
                                    open=data['Open'],
                                    high=data['High'],
                                    low=data['Low'],
                                    close=data['Close'],
                                    volume=data['Volume'],
                                    source='yahoo')
                yahoo_stock.save()

        insert_files.append(
            dict(symbol=symbol,
                 date=date,
                 path=filename,
                 stock=1,
                 contracts=contracts,
                 options=options))

    parameters = dict(insert_files=insert_files)

    return render(request, template, parameters)
Beispiel #38
0
 def test_get_multi_invalid(self):
     sl = ['AAPL', 'AMZN', 'INVALID']
     pan = web.get_data_google(sl, '2012')
     self.assertIn('INVALID', pan.minor_axis)
Beispiel #39
0
 def test_get_multi1(self):
     sl = ['AAPL', 'AMZN', 'GOOG']
     pan = web.get_data_google(sl, '2012')
     ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
     assert ts[0].dayofyear == 96
Beispiel #40
0
 def test_unicode_date(self):
     #GH8967
     data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
     self.assertEqual(data.index.name, 'Date')
Beispiel #41
0
def downloadQuotes(tickers, date1=None, date2=None, adjust=True, Verbose=False):
    """
    Given a ticker sequence, return historical Yahoo! quotes as a pandas DataFrame.

    Parameters
    ----------
    tickers : sequence
        A sequence (such as a list) of string tickers. For example:
        ['aapl', 'msft']
    date1 : {datetime.date, tuple}, optional
        The first date to grab historical quotes on. For example:
        datetime.date(2010, 1, 1) or (2010, 1, 1). By default the first
        date is (1900, 1, 1).
    date2 : {datetime.date, tuple}, optional
        The last date to grab historical quotes on. For example:
        datetime.date(2010, 12, 31) or (2010, 12, 31). By default the last
        date is 10 days beyond today's date.
    adjust : bool, optional
        Adjust (default) the open, close, high, and low prices. The
        adjustment takes splits and dividends into account such that the
        corresponding returns are correct. Volume is already split adjusted
        by Yahoo so it is not changed by the value of `adjust`.
    Verbose : bool, optional
        Print the ticker currently being loaded. By default the tickers are
        not printed.

    Returns
    -------
    quotes_df : DataFrame
        A pandas dataframe is returned. In order, the  axes contain: dates,
        quotes (adjusted close). The elements along the item axis depend on the value
        of `adjust`. When `adjust` is False, the items are

        ['open', 'close', 'high', 'low', 'volume', 'adjclose']

        When adjust is true (default), the adjusted close ('adjclose') is
        not included. The dates are datetime.date objects.

    Examples
    --------
    items = ['Adj Close']
    date1 = '2012-01-01'
    date2 = datetime.date.today()
    ticker = 'GOOGL'
    data = get_data_yahoo(ticker, start = date1, end = date2)[items]
    dates = data.index
    data.columns = [ticker]

    ticker = 'AMZN'
    data2 = get_data_yahoo(ticker, start = date1, end = date2)[items]
    dates2 = data2.index
    data2.columns = [ticker]

    data = data.join(data2, how='outer')
    data.sort_index( axis=0, inplace=True )

    data.tail()

                 GOOGL    AMZN
    Date
    2014-04-07  540.63  317.76
    2014-04-08  557.51  327.07
    2014-04-09  567.04  331.81
    2014-04-10  546.69  317.11
    2014-04-11  537.76  311.73


    """

    from time import sleep
    from matplotlib.finance import *
    #from la.external.matplotlib import quotes_historical_yahoo
    import pandas as pd
    from pandas.io.data import DataReader
    from pandas.io.data import get_data_yahoo, get_data_google
    #import la

    if date1 is None:
        date1 = datetime.date(1900, 1, 1)
    if date2 is None:
        date2 = datetime.date.today() + datetime.timedelta(+10)
    #quotes_df = None
    #lar = None
    items = ['Adj Close']
    google_items = ['Close']
    if Verbose:
        print "Load data"

    i=0
    number_tries = 0
    re_tries = 0
    for itick, ticker in enumerate(tickers):
        if Verbose:
            print "\t" + ticker + "  ",

        data = []
        dates = []

        #number_tries = 0
        try:
            # read in dataframe containing adjusted close quotes for a ticker in the list
            #print "number_tries = ", number_tries
            if number_tries < 11:
                #print "number_tries = ", number_tries, " trying with yahoo"
                try:
                    data = get_data_yahoo(ticker, start = date1, end = date2)[items]
                    number_tries = 0
                except:
                    pass
            else:
                #print "number_tries = ", number_tries, " trying with google"
                print "   ...retrieving quotes using google"
                try:
                    data = get_data_google(ticker, start = date1, end = date2)[google_items]
                    number_tries = 0
                except:
                    pass
            #print ' data = ', data
            dates = data.index
            #print ' dates = ', dates
            dates = [d.to_datetime() for d in dates]
            
            data.columns = [ticker]
            #print ' ticker = ', [ticker]
            #print ' data.columns = ', data.columns
            if Verbose:
                print i," of ",len(tickers)," ticker ",ticker," has ",data.shape[0]," quotes"

            if itick-re_tries == 0:
                #print " creating dataframe..."
                quotes_df = data
            else:
                #print " joining to dataframe..."
                quotes_df = quotes_df.join( data, how='outer' )
                #print " joined to dataframe..."
            i += 1
        except:
            print "could not get quotes for ", ticker, "         will try again and again.", number_tries
            sleep(3)
            number_tries += 1
            re_tries += 1
            if number_tries < 20:
                tickers[itick+1:itick+1] = [ticker]

    print "number of tickers successfully processed = ", i
    if i > 0 :
        quotes_df.sort_index( axis=0, inplace=True )
        return quotes_df

    else :
        # return empty DataFrame
        quotes_df = pd.DataFrame( [0,0], ['Dates',date2])
        quotes_df.columns = ['None']

    return quotes_df
Beispiel #42
0
                  index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns=[['Ohio', 'Ohio', 'Colorado'],
                           ['Green', 'Red', 'Green']])
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame.sum(level="color", axis=1)
frame.sum(level="state", axis=1)

ser = Series(np.arange(3.), index=list("abc"))
ser
ser[-1]
ser.iget_value(2)

import pandas.io.data as web
pdata = pd.Panel(
    dict((stk, web.get_data_google(stk, "1/1/2012", "12/30/2014"))
         for stk in ["AAPL", "GOOG", "MSFT", "DELL"]))
pdata
pdata = pdata.swapaxes("items", "minor")
pdata
pdata.ix[:, "12/3/2012", :]

import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
nsample = 100
x = np.linspace(0, 10, 100)
x
X = np.column_stack((x, x**2))
X
X = sm.add_constant(X)
Beispiel #43
0
 def test_get_goog_volume(self):
     df = web.get_data_google("GOOG")
     self.assertEqual(df.Volume.ix["OCT-08-2010"], 2863473)