def test_get_data(self): import numpy as np df = web.get_data_google('GOOG') print(df.Volume.ix['OCT-08-2010']) assert df.Volume.ix['OCT-08-2010'] == 2863473 sl = ['AAPL', 'AMZN', 'GOOG'] pan = web.get_data_google(sl, '2012') ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] assert ts[0].dayofyear == 96 pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') expected = [19.02, 28.23, 25.39] result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist() assert result == expected # sanity checking t= np.array(result) assert np.issubdtype(t.dtype, np.floating) assert t.shape == (3,) expected = [[ 18.99, 28.4 , 25.18], [ 18.58, 28.31, 25.13], [ 19.03, 28.16, 25.52], [ 18.81, 28.82, 25.87]] result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values assert (result == expected).all() # sanity checking t= np.array(pan) assert np.issubdtype(t.dtype, np.floating)
def handle(self, *args, **options): if '-c' in args or '--clear' in args or options.get('clear'): self.clear() return months = [] if len(args) == 2: for arg in args: try: dt = datetime.strptime(arg, '%Y-%m-%d') except: print("wrong data format format, need YYYY-MM-DD") raise else: months.append(dt) else: months = [self.start_time, (date.today() - timedelta(days=1))] all_data = {} quantize = Decimal('0.01') for ticker in NegotiablePaper.objects.all(): data_series = web.get_data_google(ticker.name, *months) cache_gen = [DataCache(paper=ticker, date=ind, price=self.get_price(data_series.Close[ind])) \ for ind in data_series.index] DataCache.objects.bulk_create(cache_gen)
def test_dtypes(self): #GH3995 data = web.get_data_google('MSFT', 'JAN-01-12', 'JAN-31-12') assert np.issubdtype(data.Open.dtype, np.number) assert np.issubdtype(data.Close.dtype, np.number) assert np.issubdtype(data.Low.dtype, np.number) assert np.issubdtype(data.High.dtype, np.number) assert np.issubdtype(data.Volume.dtype, np.number)
def test_dtypes(self): #GH3995, #GH8980 data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13') self.assertTrue(np.issubdtype(data.Open.dtype, np.number)) self.assertTrue(np.issubdtype(data.Close.dtype, np.number)) self.assertTrue(np.issubdtype(data.Low.dtype, np.number)) self.assertTrue(np.issubdtype(data.High.dtype, np.number)) self.assertTrue(np.issubdtype(data.Volume.dtype, np.number))
def get_time_series(comm_id): today=datetime.date.today() lastyear=today-datetime.timedelta(364) f = web.get_data_google(comm_id,lastyear, today) f.index=f.index.strftime('%Y-%m-%d') x=f.Close.quantile([.1,.25,.5,.75,.9]).to_json() ts = f.Close.to_json() finalObj = { 'ts': ts, 'quantiles': x } return (finalObj)
def test_get_multi1(self): for locale in self.locales: sl = ['AAPL', 'AMZN', 'GOOG'] with tm.set_locale(locale): pan = web.get_data_google(sl, '2012') ts = pan.Close.GOOG.index[pan.Close.AAPL < pan.Close.GOOG] if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and hasattr(pan.Close, 'AAPL')): self.assertEqual(ts[0].dayofyear, 3) else: self.assertRaises(AttributeError, lambda: pan.Close)
def test_get_multi1(self): for locale in self.locales: sl = ['AAPL', 'AMZN', 'GOOG'] with tm.set_locale(locale): pan = web.get_data_google(sl, '2012', '2013') ts = pan.Close.GOOG.index[pan.Close.AAPL < pan.Close.GOOG] if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and hasattr(pan.Close, 'AAPL')): self.assertEqual(ts[0].dayofyear, 3) else: self.assertRaises(AttributeError, lambda: pan.Close)
def test_get_multi1(self): sl = ["AAPL", "AMZN", "GOOG"] pan = web.get_data_google(sl, "2012") def testit(): ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] self.assertEquals(ts[0].dayofyear, 96) if hasattr(pan, "Close") and hasattr(pan.Close, "GOOG") and hasattr(pan.Close, "AAPL"): testit() else: self.assertRaises(AttributeError, testit)
def test_get_multi2(self): with warnings.catch_warnings(record=True) as w: pan = web.get_data_google(["GE", "MSFT", "INTC"], "JAN-01-12", "JAN-31-12") result = pan.Close.ix["01-18-12"] assert_n_failed_equals_n_null_columns(w, result) # sanity checking assert np.issubdtype(result.dtype, np.floating) result = pan.Open.ix["Jan-15-12":"Jan-20-12"] self.assertEqual((4, 3), result.shape) assert_n_failed_equals_n_null_columns(w, result)
def test_get_multi1(self): sl = ['AAPL', 'AMZN', 'GOOG'] pan = web.get_data_google(sl, '2012') def testit(): ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] self.assertEquals(ts[0].dayofyear, 96) if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and hasattr(pan.Close, 'AAPL')): testit() else: self.assertRaises(AttributeError, testit)
def test_get_multi2(self): with warnings.catch_warnings(record=True) as w: pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') result = pan.Close.ix['01-18-12'] assert_n_failed_equals_n_null_columns(w, result) # sanity checking assert np.issubdtype(result.dtype, np.floating) result = pan.Open.ix['Jan-15-12':'Jan-20-12'] self.assertEqual((4, 3), result.shape) assert_n_failed_equals_n_null_columns(w, result)
def test_get_multi2(self): pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') result = pan.Close.ix['01-18-12'] self.assertEqual(len(result), 3) # sanity checking assert np.issubdtype(result.dtype, np.floating) expected = np.array([[ 18.99, 28.4 , 25.18], [ 18.58, 28.31, 25.13], [ 19.03, 28.16, 25.52], [ 18.81, 28.82, 25.87]]) result = pan.Open.ix['Jan-15-12':'Jan-20-12'] self.assertEqual(np.array(expected).shape, result.shape)
def test_get_multi2(self): pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') expected = [19.02, 28.23, 25.39] result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist() assert result == expected # sanity checking t= np.array(result) assert np.issubdtype(t.dtype, np.floating) assert t.shape == (3,) expected = [[ 18.99, 28.4 , 25.18], [ 18.58, 28.31, 25.13], [ 19.03, 28.16, 25.52], [ 18.81, 28.82, 25.87]] result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values assert (result == expected).all() # sanity checking t= np.array(pan) assert np.issubdtype(t.dtype, np.floating)
def test_get_multi_invalid(self): sl = ['AAPL', 'AMZN', 'INVALID'] with tm.assert_produces_warning(SymbolWarning): pan = web.get_data_google(sl, '2012') self.assertIn('INVALID', pan.minor_axis)
def test_get_goog_volume(self): for locale in self.locales: with tm.set_locale(locale): df = web.get_data_google('GOOG').sort_index() self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473)
def downloadQuotes(tickers, date1=None, date2=None, adjust=True, Verbose=False): """ Given a ticker sequence, return historical Yahoo! quotes as a pandas DataFrame. Parameters ---------- tickers : sequence A sequence (such as a list) of string tickers. For example: ['aapl', 'msft'] date1 : {datetime.date, tuple}, optional The first date to grab historical quotes on. For example: datetime.date(2010, 1, 1) or (2010, 1, 1). By default the first date is (1900, 1, 1). date2 : {datetime.date, tuple}, optional The last date to grab historical quotes on. For example: datetime.date(2010, 12, 31) or (2010, 12, 31). By default the last date is 10 days beyond today's date. adjust : bool, optional Adjust (default) the open, close, high, and low prices. The adjustment takes splits and dividends into account such that the corresponding returns are correct. Volume is already split adjusted by Yahoo so it is not changed by the value of `adjust`. Verbose : bool, optional Print the ticker currently being loaded. By default the tickers are not printed. Returns ------- quotes_df : DataFrame A pandas dataframe is returned. In order, the axes contain: dates, quotes (adjusted close). The elements along the item axis depend on the value of `adjust`. When `adjust` is False, the items are ['open', 'close', 'high', 'low', 'volume', 'adjclose'] When adjust is true (default), the adjusted close ('adjclose') is not included. The dates are datetime.date objects. Examples -------- items = ['Adj Close'] date1 = '2012-01-01' date2 = datetime.date.today() ticker = 'GOOGL' data = get_data_yahoo(ticker, start = date1, end = date2)[items] dates = data.index data.columns = [ticker] ticker = 'AMZN' data2 = get_data_yahoo(ticker, start = date1, end = date2)[items] dates2 = data2.index data2.columns = [ticker] data = data.join(data2, how='outer') data.sort_index( axis=0, inplace=True ) data.tail() GOOGL AMZN Date 2014-04-07 540.63 317.76 2014-04-08 557.51 327.07 2014-04-09 567.04 331.81 2014-04-10 546.69 317.11 2014-04-11 537.76 311.73 """ from time import sleep from matplotlib.finance import * #from la.external.matplotlib import quotes_historical_yahoo import pandas as pd from pandas.io.data import DataReader from pandas.io.data import get_data_yahoo, get_data_google #import la if date1 is None: date1 = datetime.date(1900, 1, 1) if date2 is None: date2 = datetime.date.today() + datetime.timedelta(+10) #quotes_df = None #lar = None items = ['Adj Close'] google_items = ['Close'] if Verbose: print "Load data" i = 0 number_tries = 0 re_tries = 0 for itick, ticker in enumerate(tickers): if Verbose: print "\t" + ticker + " ", data = [] dates = [] #number_tries = 0 try: # read in dataframe containing adjusted close quotes for a ticker in the list #print "number_tries = ", number_tries if number_tries < 11: #print "number_tries = ", number_tries, " trying with yahoo" try: data = get_data_yahoo(ticker, start=date1, end=date2)[items] number_tries = 0 except: pass else: #print "number_tries = ", number_tries, " trying with google" print " ...retrieving quotes using google" try: data = get_data_google(ticker, start=date1, end=date2)[google_items] number_tries = 0 except: pass #print ' data = ', data dates = data.index #print ' dates = ', dates dates = [d.to_datetime() for d in dates] data.columns = [ticker] #print ' ticker = ', [ticker] #print ' data.columns = ', data.columns if Verbose: print i, " of ", len( tickers ), " ticker ", ticker, " has ", data.shape[0], " quotes" if itick - re_tries == 0: #print " creating dataframe..." quotes_df = data else: #print " joining to dataframe..." quotes_df = quotes_df.join(data, how='outer') #print " joined to dataframe..." i += 1 except: print "could not get quotes for ", ticker, " will try again and again.", number_tries sleep(3) number_tries += 1 re_tries += 1 if number_tries < 20: tickers[itick + 1:itick + 1] = [ticker] print "number of tickers successfully processed = ", i if i > 0: quotes_df.sort_index(axis=0, inplace=True) return quotes_df else: # return empty DataFrame quotes_df = pd.DataFrame([0, 0], ['Dates', date2]) quotes_df.columns = ['None'] return quotes_df
def get_data(tickerList, fromDate, toDate): data = web.get_data_google(tickerList, fromDate, toDate, ret_index=True) return data
def data_web_import_view(request, symbol=''): """ Select symbol for web get google data :param symbol: str :param request: request :return: render """ # noinspection PyShadowingNames def create_stock(symbol, index, data, source): """ Create a stock object :param symbol: str :param index: datetime :param data: dict :param source: str :return: Stock """ return Stock( symbol=symbol, date=index.strftime('%Y-%m-%d'), open=data['Open'], high=data['High'], low=data['Low'], close=data['Close'], volume=data['Volume'], source=source ) template = 'data/run_web.html' if symbol: symbol = symbol.upper() stocks = Stock.objects.filter(symbol=symbol) if stocks.exists(): tos_thinkback = stocks.filter(source='tos_thinkback').order_by('date') if tos_thinkback.exists() and tos_thinkback.count() > 1: tb_first_date = tos_thinkback.first().date tb_last_date = tos_thinkback.last().date else: raise LookupError( '< {symbol} > No enough stock data from source tos_thinkback.'.format( symbol=symbol ) ) web_stocks = stocks.filter(Q(source='google') | Q(source='yahoo')).order_by('date') new_stocks = list() if web_stocks.exists(): # generate a list of date then web get # tos_thinkback_dates = [stock.date for stock in tos_thinkback] #print tos_thinkback_dates # generate a list of bday using tb date, remove date from list using google date google_dates = [stock.date.strftime('%Y-%m-%d') for stock in web_stocks.filter(source='google')] yahoo_dates = [stock.date.strftime('%Y-%m-%d') for stock in web_stocks.filter(source='yahoo')] google_data = get_data_google( symbols=symbol, #start='2015-04-01', end='2015-04-10', # test only start=tb_first_date, end=tb_last_date, adjust_price=True ) for index, data in google_data.iterrows(): if index.strftime('%Y-%m-%d') not in google_dates: # not found for google, insert db if int(data['Volume']) > 0: stock = create_stock(symbol, index, data, 'google') new_stocks.append(stock) yahoo_data = get_data_yahoo( symbols=symbol, # start='2015-04-01', end='2015-04-10', # test only start=tb_first_date, end=tb_last_date, adjust_price=True ) for index, data in yahoo_data.iterrows(): if index.strftime('%Y-%m-%d') not in yahoo_dates: # not found for google, insert db if int(data['Volume']) > 0: stock = create_stock(symbol, index, data, 'yahoo') new_stocks.append(stock) else: # import all data, google google_data = get_data_google( symbols=symbol, # start='2015-04-01', end='2015-04-10', # test only start=tb_first_date, end=tb_last_date, adjust_price=True ) for index, data in google_data.iterrows(): if int(data['Volume']) > 0: stock = create_stock(symbol, index, data, 'google') new_stocks.append(stock) #print index, data['Open'], data['High'], data['Low'], data['Close'], data['Volume'] # import all data, yahoo yahoo_data = get_data_yahoo( symbols=symbol, # start='2015-04-01', end='2015-04-10', # test only start=tb_first_date, end=tb_last_date, adjust_price=True ) for index, data in yahoo_data.iterrows(): if int(data['Volume']) > 0: stock = create_stock(symbol, index, data, 'yahoo') new_stocks.append(stock) # bulk insert if len(new_stocks): Stock.objects.bulk_create(new_stocks) else: raise ObjectDoesNotExist( "No stock data on < {symbol} >, run tos_thinkback before get google".format( symbol=symbol ) ) else: raise ValueError('Symbol is blank.') # reset # Stock.objects.filter(symbol=symbol).filter(source='google').delete() parameters = dict( symbol=symbol, stocks=new_stocks ) return render(request, template, parameters)
def test_get_goog_volume(self): df = web.get_data_google('GOOG') self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473)
def test_get_goog_volume(self): for locale in self.locales: with tm.set_locale(locale): df = web.get_data_google('GOOG').sort_index() self.assertEqual(df.Volume.ix['JAN-02-2015'], 1446662)
# -*- coding: utf-8 -*- """ Created on Sun Oct 15 17:31:39 2017 Examples from the Book Python for Data Analysis, on Data Frames @author: xuehuachen """ from pandas import DataFrame import pandas as pd import pandas.io.data as web all_data = {} for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']: all_data[ticker] = web.get_data_google(ticker) price = DataFrame({tic: data['Close'] for tic, data in all_data.iteritems()}) volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()}) returns = price.pct_change() # Correlation and Covariance returns.MSFT.corr(returns.IBM) returns.MSFT.cov(returns.IBM) returns.corr() returns.cov() returns.corrwith(returns.IBM) returns.corrwith(volume)
import pandas.io.data as web import datetime import pandas import simplejson start = datetime.datetime(2015, 1, 20) end = datetime.datetime(2015, 1, 27) datetime.date.today()-datetime.timedelta(364) f = web.get_data_google("cow",start, end) g = web.Options('cow', 'yahoo') def get_time_series(comm_id): today=datetime.date.today() lastyear=today-datetime.timedelta(364) f = web.get_data_google(comm_id,lastyear, today) f.index=f.index.strftime('%Y-%m-%d') x=f.Close.quantile([.1,.25,.5,.75,.9]).to_json() ts = f.Close.to_json() finalObj = { 'ts': ts, 'quantiles': x } return (finalObj) def intuit_to_commid(intuitid): if intuitid == "beef": return "cow" if intuitid == "corn": return intuitid
def test_get_multi_invalid(self): sl = ['AAPL', 'AMZN', 'INVALID'] pan = web.get_data_google(sl, '2012') self.assertIn('INVALID', pan.minor_axis)
def test_get_goog_volume(self): df = web.get_data_google('GOOG') assert df.Volume.ix['OCT-08-2010'] == 2863473
def test_unicode_date(self): #GH8967 data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13') self.assertEqual(data.index.name, 'Date')
index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] frame.sum(level="color", axis=1) frame.sum(level="state", axis=1) ser = Series(np.arange(3.), index=list("abc")) ser ser[-1] ser.iget_value(2) import pandas.io.data as web pdata = pd.Panel( dict((stk, web.get_data_google(stk, "1/1/2012", "12/30/2014")) for stk in ["AAPL", "GOOG", "MSFT", "DELL"])) pdata pdata = pdata.swapaxes("items", "minor") pdata pdata.ix[:, "12/3/2012", :] import statsmodels.api as sm import numpy as np import matplotlib.pyplot as plt nsample = 100 x = np.linspace(0, 10, 100) x X = np.column_stack((x, x**2)) X X = sm.add_constant(X)
def data_web_import_view(request, symbol=''): """ Select symbol for web get google data :param symbol: str :param request: request :return: render """ # noinspection PyShadowingNames def create_stock(symbol, index, data, source): """ Create a stock object :param symbol: str :param index: datetime :param data: dict :param source: str :return: Stock """ return Stock(symbol=symbol, date=index.strftime('%Y-%m-%d'), open=data['Open'], high=data['High'], low=data['Low'], close=data['Close'], volume=data['Volume'], source=source) template = 'data/run_web.html' if symbol: symbol = symbol.upper() stocks = Stock.objects.filter(symbol=symbol) if stocks.exists(): tos_thinkback = stocks.filter( source='tos_thinkback').order_by('date') if tos_thinkback.exists() and tos_thinkback.count() > 1: tb_first_date = tos_thinkback.first().date tb_last_date = tos_thinkback.last().date else: raise LookupError( '< {symbol} > No enough stock data from source tos_thinkback.' .format(symbol=symbol)) web_stocks = stocks.filter(Q(source='google') | Q(source='yahoo')).order_by('date') new_stocks = list() if web_stocks.exists(): # generate a list of date then web get # tos_thinkback_dates = [stock.date for stock in tos_thinkback] #print tos_thinkback_dates # generate a list of bday using tb date, remove date from list using google date google_dates = [ stock.date.strftime('%Y-%m-%d') for stock in web_stocks.filter(source='google') ] yahoo_dates = [ stock.date.strftime('%Y-%m-%d') for stock in web_stocks.filter(source='yahoo') ] google_data = get_data_google( symbols=symbol, #start='2015-04-01', end='2015-04-10', # test only start=tb_first_date, end=tb_last_date, adjust_price=True) for index, data in google_data.iterrows(): if index.strftime('%Y-%m-%d') not in google_dates: # not found for google, insert db if int(data['Volume']) > 0: stock = create_stock(symbol, index, data, 'google') new_stocks.append(stock) yahoo_data = get_data_yahoo( symbols=symbol, # start='2015-04-01', end='2015-04-10', # test only start=tb_first_date, end=tb_last_date, adjust_price=True) for index, data in yahoo_data.iterrows(): if index.strftime('%Y-%m-%d') not in yahoo_dates: # not found for google, insert db if int(data['Volume']) > 0: stock = create_stock(symbol, index, data, 'yahoo') new_stocks.append(stock) else: # import all data, google google_data = get_data_google( symbols=symbol, # start='2015-04-01', end='2015-04-10', # test only start=tb_first_date, end=tb_last_date, adjust_price=True) for index, data in google_data.iterrows(): if int(data['Volume']) > 0: stock = create_stock(symbol, index, data, 'google') new_stocks.append(stock) #print index, data['Open'], data['High'], data['Low'], data['Close'], data['Volume'] # import all data, yahoo yahoo_data = get_data_yahoo( symbols=symbol, # start='2015-04-01', end='2015-04-10', # test only start=tb_first_date, end=tb_last_date, adjust_price=True) for index, data in yahoo_data.iterrows(): if int(data['Volume']) > 0: stock = create_stock(symbol, index, data, 'yahoo') new_stocks.append(stock) # bulk insert if len(new_stocks): Stock.objects.bulk_create(new_stocks) else: raise ObjectDoesNotExist( "No stock data on < {symbol} >, run tos_thinkback before get google" .format(symbol=symbol)) else: raise ValueError('Symbol is blank.') # reset # Stock.objects.filter(symbol=symbol).filter(source='google').delete() parameters = dict(symbol=symbol, stocks=new_stocks) return render(request, template, parameters)
def data_daily_import_view(request): """ Import all csv files in daily folder then insert web data for that date :param request: request :return: render """ template = 'data/daily.html' insert_files = list() files = [ path for path in glob(os.path.join(THINKBACK_DIR, '_daily', '*.csv')) ] for f in files: contracts = 0 options = 0 # get filename and dir filename = os.path.basename(f) print 'running file: %s...' % filename date, symbol = map( lambda x: x.upper(), filename[:-4].split('-StockAndOptionQuoteFor') ) # file into dict stock_data, option_data = OpenThinkBack(date=date, data=open(f).read()).format() # save stock stock = Stock() stock.symbol = symbol stock.source = 'tos_thinkback' stock.data = stock_data stock.save() # save contract and option for contract_dict, option_dict in option_data: try: contract = OptionContract.objects.get(option_code=contract_dict['option_code']) except ObjectDoesNotExist: contract = OptionContract() contract.symbol = symbol contract.source = 'tos_thinkback' contract.data = contract_dict contract.save() contracts += 1 option = Option() option.option_contract = contract option.data = option_dict option.save() options += 1 # move file into folder year = filename[:4] year_dir = os.path.join(THINKBACK_DIR, symbol, year) # make dir if not exists if not os.path.isdir(year_dir): os.mkdir(year_dir) os.rename(f, os.path.join(year_dir, os.path.basename(f))) # save data from web google_data = get_data_google( symbols=symbol, # start='2015-04-01', end='2015-04-10', # test only start=date, end=date, adjust_price=True ) yahoo_data = get_data_yahoo( symbols=symbol, # start='2015-04-01', end='2015-04-10', # test only start=date, end=date, adjust_price=True ) for index, data in google_data.iterrows(): if int(data['Volume']) > 0: google_stock = Stock( symbol=symbol, date=index.strftime('%Y-%m-%d'), open=data['Open'], high=data['High'], low=data['Low'], close=data['Close'], volume=data['Volume'], source='google' ) google_stock.save() for index, data in yahoo_data.iterrows(): if int(data['Volume']) > 0: yahoo_stock = Stock( symbol=symbol, date=index.strftime('%Y-%m-%d'), open=data['Open'], high=data['High'], low=data['Low'], close=data['Close'], volume=data['Volume'], source='yahoo' ) yahoo_stock.save() insert_files.append( dict( symbol=symbol, date=date, path=filename, stock=1, contracts=contracts, options=options ) ) parameters = dict( insert_files=insert_files ) return render(request, template, parameters)
def data_daily_import_view(request): """ Import all csv files in daily folder then insert web data for that date :param request: request :return: render """ template = 'data/daily.html' insert_files = list() files = [ path for path in glob(os.path.join(THINKBACK_DIR, '_daily', '*.csv')) ] for f in files: contracts = 0 options = 0 # get filename and dir filename = os.path.basename(f) print 'running file: %s...' % filename date, symbol = map(lambda x: x.upper(), filename[:-4].split('-StockAndOptionQuoteFor')) # file into dict stock_data, option_data = OpenThinkBack(date=date, data=open(f).read()).format() # save stock stock = Stock() stock.symbol = symbol stock.source = 'tos_thinkback' stock.data = stock_data stock.save() # save contract and option for contract_dict, option_dict in option_data: try: contract = OptionContract.objects.get( option_code=contract_dict['option_code']) except ObjectDoesNotExist: contract = OptionContract() contract.symbol = symbol contract.source = 'tos_thinkback' contract.data = contract_dict contract.save() contracts += 1 option = Option() option.option_contract = contract option.data = option_dict option.save() options += 1 # move file into folder year = filename[:4] year_dir = os.path.join(THINKBACK_DIR, symbol, year) # make dir if not exists if not os.path.isdir(year_dir): os.mkdir(year_dir) os.rename(f, os.path.join(year_dir, os.path.basename(f))) # save data from web google_data = get_data_google( symbols=symbol, # start='2015-04-01', end='2015-04-10', # test only start=date, end=date, adjust_price=True) yahoo_data = get_data_yahoo( symbols=symbol, # start='2015-04-01', end='2015-04-10', # test only start=date, end=date, adjust_price=True) for index, data in google_data.iterrows(): if int(data['Volume']) > 0: google_stock = Stock(symbol=symbol, date=index.strftime('%Y-%m-%d'), open=data['Open'], high=data['High'], low=data['Low'], close=data['Close'], volume=data['Volume'], source='google') google_stock.save() for index, data in yahoo_data.iterrows(): if int(data['Volume']) > 0: yahoo_stock = Stock(symbol=symbol, date=index.strftime('%Y-%m-%d'), open=data['Open'], high=data['High'], low=data['Low'], close=data['Close'], volume=data['Volume'], source='yahoo') yahoo_stock.save() insert_files.append( dict(symbol=symbol, date=date, path=filename, stock=1, contracts=contracts, options=options)) parameters = dict(insert_files=insert_files) return render(request, template, parameters)
def test_get_multi1(self): sl = ['AAPL', 'AMZN', 'GOOG'] pan = web.get_data_google(sl, '2012') ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] assert ts[0].dayofyear == 96
def downloadQuotes(tickers, date1=None, date2=None, adjust=True, Verbose=False): """ Given a ticker sequence, return historical Yahoo! quotes as a pandas DataFrame. Parameters ---------- tickers : sequence A sequence (such as a list) of string tickers. For example: ['aapl', 'msft'] date1 : {datetime.date, tuple}, optional The first date to grab historical quotes on. For example: datetime.date(2010, 1, 1) or (2010, 1, 1). By default the first date is (1900, 1, 1). date2 : {datetime.date, tuple}, optional The last date to grab historical quotes on. For example: datetime.date(2010, 12, 31) or (2010, 12, 31). By default the last date is 10 days beyond today's date. adjust : bool, optional Adjust (default) the open, close, high, and low prices. The adjustment takes splits and dividends into account such that the corresponding returns are correct. Volume is already split adjusted by Yahoo so it is not changed by the value of `adjust`. Verbose : bool, optional Print the ticker currently being loaded. By default the tickers are not printed. Returns ------- quotes_df : DataFrame A pandas dataframe is returned. In order, the axes contain: dates, quotes (adjusted close). The elements along the item axis depend on the value of `adjust`. When `adjust` is False, the items are ['open', 'close', 'high', 'low', 'volume', 'adjclose'] When adjust is true (default), the adjusted close ('adjclose') is not included. The dates are datetime.date objects. Examples -------- items = ['Adj Close'] date1 = '2012-01-01' date2 = datetime.date.today() ticker = 'GOOGL' data = get_data_yahoo(ticker, start = date1, end = date2)[items] dates = data.index data.columns = [ticker] ticker = 'AMZN' data2 = get_data_yahoo(ticker, start = date1, end = date2)[items] dates2 = data2.index data2.columns = [ticker] data = data.join(data2, how='outer') data.sort_index( axis=0, inplace=True ) data.tail() GOOGL AMZN Date 2014-04-07 540.63 317.76 2014-04-08 557.51 327.07 2014-04-09 567.04 331.81 2014-04-10 546.69 317.11 2014-04-11 537.76 311.73 """ from time import sleep from matplotlib.finance import * #from la.external.matplotlib import quotes_historical_yahoo import pandas as pd from pandas.io.data import DataReader from pandas.io.data import get_data_yahoo, get_data_google #import la if date1 is None: date1 = datetime.date(1900, 1, 1) if date2 is None: date2 = datetime.date.today() + datetime.timedelta(+10) #quotes_df = None #lar = None items = ['Adj Close'] google_items = ['Close'] if Verbose: print "Load data" i=0 number_tries = 0 re_tries = 0 for itick, ticker in enumerate(tickers): if Verbose: print "\t" + ticker + " ", data = [] dates = [] #number_tries = 0 try: # read in dataframe containing adjusted close quotes for a ticker in the list #print "number_tries = ", number_tries if number_tries < 11: #print "number_tries = ", number_tries, " trying with yahoo" try: data = get_data_yahoo(ticker, start = date1, end = date2)[items] number_tries = 0 except: pass else: #print "number_tries = ", number_tries, " trying with google" print " ...retrieving quotes using google" try: data = get_data_google(ticker, start = date1, end = date2)[google_items] number_tries = 0 except: pass #print ' data = ', data dates = data.index #print ' dates = ', dates dates = [d.to_datetime() for d in dates] data.columns = [ticker] #print ' ticker = ', [ticker] #print ' data.columns = ', data.columns if Verbose: print i," of ",len(tickers)," ticker ",ticker," has ",data.shape[0]," quotes" if itick-re_tries == 0: #print " creating dataframe..." quotes_df = data else: #print " joining to dataframe..." quotes_df = quotes_df.join( data, how='outer' ) #print " joined to dataframe..." i += 1 except: print "could not get quotes for ", ticker, " will try again and again.", number_tries sleep(3) number_tries += 1 re_tries += 1 if number_tries < 20: tickers[itick+1:itick+1] = [ticker] print "number of tickers successfully processed = ", i if i > 0 : quotes_df.sort_index( axis=0, inplace=True ) return quotes_df else : # return empty DataFrame quotes_df = pd.DataFrame( [0,0], ['Dates',date2]) quotes_df.columns = ['None'] return quotes_df
def test_get_goog_volume(self): df = web.get_data_google("GOOG") self.assertEqual(df.Volume.ix["OCT-08-2010"], 2863473)