def get_data(self, sids, start, end): data = factory.load_bars_from_yahoo(stocks=sids, indexes={}, start=start, end=end) self.sids = data.items return data
def run_clusters(strategy_class, clustering_tickers, cluster_num, epochs_num, training_start, training_end, backtest_start, backtest_end, is_graph, is_elbow): """ Run the test given command-line args. Cluster. For each cluster, train a strategy on that cluster. For each stock in that cluster, run a backtest. Graph results. """ print "\nGathering data..." ticker_list, raw_stock_data_list = Manager.getRawStockDataList( clustering_tickers, training_start, training_end, 252) normalized_stock_data_list = [ Manager.preprocessData(x) for x in raw_stock_data_list ] print "\nClustering..." tickers, clusters = createClusters(ticker_list, normalized_stock_data_list, cluster_num) print "# of stocks: " + str(len(normalized_stock_data_list)) print "# of clusters: " + str(len(clusters)) print "" for t, c in itertools.izip(tickers, clusters): print "\tCluster: " + str(len(c)), "stocks: ", for symbol in t: print symbol, print "" if is_graph: graphClusters(clusters) if is_elbow: graphElbowMethod(normalized_stock_data_list) for t, cluster in itertools.izip(tickers, clusters): settings.STRATEGY_OBJECT = trainStrategy(strategy_class, cluster, epochs_num) for ticker in t: print "Cluster:", t print "Stock:", ticker tmp_ticks, tmp_data = Manager.getRawStockDataList([ticker], training_start, training_end, 252) settings.BACKTEST_STOCK = ticker settings.PRE_BACKTEST_DATA = tmp_data[0] print "Create Algorithm..." algo_obj = TradingAlgorithm(initialize=initialize, handle_data=handle_data) try: backtest_data = load_bars_from_yahoo(stocks=[ticker, 'SPY'], start=backtest_start, end=backtest_end) try: perf = algo_obj.run(backtest_data) analyze([ticker], [perf]) except ValueError, e: print str(e) except IOError, e: print "Stock Error: could not load", ticker, "from Yahoo." print "Only testing one cluster for now - Done!" return
def test_yahoo_bars_to_panel_source(self, env=None): stocks = ['AAPL', 'GE'] start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = factory.load_bars_from_yahoo(stocks=stocks, indexes={}, start=start, end=end) check_fields = [ 'sid', 'open', 'high', 'low', 'close', 'volume', 'price' ] source = DataPanelSource(data) sids = [ asset.sid for asset in [ env.asset_finder.lookup_symbol(symbol, as_of_date=end) for symbol in stocks ] ] stocks_iter = cycle(sids) for event in source: for check_field in check_fields: self.assertIn(check_field, event) self.assertTrue(isinstance(event['volume'], (integer_types))) self.assertEqual(next(stocks_iter), event['sid'])
def test_yahoo_bars_to_panel_source(self): env = TradingEnvironment() finder = AssetFinder(env.engine) stocks = ['AAPL', 'GE'] env.write_data(equities_identifiers=stocks) start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = factory.load_bars_from_yahoo(stocks=stocks, indexes={}, start=start, end=end) check_fields = ['sid', 'open', 'high', 'low', 'close', 'volume', 'price'] copy_panel = data.copy() sids = finder.map_identifier_index_to_sids( data.items, data.major_axis[0] ) copy_panel.items = sids source = DataPanelSource(copy_panel) for event in source: for check_field in check_fields: self.assertIn(check_field, event) self.assertTrue(isinstance(event['volume'], (integer_types))) self.assertTrue(event['sid'] in sids)
def test_yahoo_bars_to_panel_source(self): env = TradingEnvironment() finder = AssetFinder(env.engine) stocks = ['AAPL', 'GE'] env.write_data(equities_identifiers=stocks) start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = factory.load_bars_from_yahoo(stocks=stocks, indexes={}, start=start, end=end) check_fields = [ 'sid', 'open', 'high', 'low', 'close', 'volume', 'price' ] copy_panel = data.copy() sids = finder.map_identifier_index_to_sids(data.items, data.major_axis[0]) copy_panel.items = sids source = DataPanelSource(copy_panel) for event in source: for check_field in check_fields: self.assertIn(check_field, event) self.assertTrue(isinstance(event['volume'], (integer_types))) self.assertTrue(event['sid'] in sids)
def prepare_data(ticker, maMethod='ema', maPeriod=20, lookAheadDays=3, start='', end='', useYahoo=False): if not end: now = datetime.datetime.now() end = now.strftime('%Y%m%d') if not start: start = (datetime.datetime.strptime(end, '%Y%m%d') + datetime.timedelta(days=-365 * 5)).strftime('%Y%m%d') if useYahoo: from zipline.utils.factory import load_bars_from_yahoo tmpStart = datetime.datetime.strptime(start, '%Y%m%d') ystart = datetime.datetime(tmpStart.year, tmpStart.month, tmpStart.day, 0, 0, 0, 0, pytz.utc) tmpEnd = datetime.datetime.strptime(end, '%Y%m%d') yend = datetime.datetime(tmpEnd.year, tmpEnd.month, tmpEnd.day, 0, 0, 0, 0, pytz.utc) stockDf = load_bars_from_yahoo(stocks=[ticker], start=ystart, end=yend, adjusted=False)[ticker].reset_index() stockDf['TradeDate'] = stockDf['Date'].apply( lambda x: x.strftime('%Y%m%d')) stockDf = stockDf[[ 'open', 'high', 'low', 'close', 'volume', 'TradeDate' ]] stockDf.rename(columns={ 'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close', 'volume': 'Volume' }, inplace=True) else: sys.path.insert(0, '/home/jinwb/code/IIA/jsforesight/datamodel') from TickerEodModel import TickerEodModel eodM = TickerEodModel('testEventDbConfigKey') stockDf = eodM.get_eod(ticker, start, end) emaDf = pd.DataFrame(index=stockDf.index) emaDf['OrigClose'] = stockDf['Close'] emaDf['OrigVolume'] = stockDf['Volume'] emaDf['TradeDate'] = stockDf['TradeDate'] emaDf['PctChg'] = stockDf['Close'].pct_change(periods=lookAheadDays) emaDf['index'] = emaDf.index if maMethod.lower() == 'ema': emaDf['Close'] = ta.EMA(stockDf['Close'].values, maPeriod) emaDf['Volume'] = ta.EMA(stockDf['Volume'].values, maPeriod) else: emaDf['Close'] = ta.MA(stockDf['Close'].values, maPeriod) emaDf['Volume'] = ta.MA(stockDf['Volume'].values, maPeriod) print emaDf.tail(10) return emaDf
def loadData(startYear, endYear, stock_list, startM=1, endM=1): """ Load data, stored as (open, high, low, close, volume, price). Must convert pandas.Panel --> pandas.DataFrame --> List of Lists """ start = datetime(startYear, startM, 1, 0, 0, 0, 0, pytz.utc) end = datetime(endYear, endM, 1, 0, 0, 0, 0, pytz.utc) data = load_bars_from_yahoo(stocks=stock_list, start=start, end=end) return data
def load_data(): """Load stock data Both training and testing data """ # Load data manually from Yahoo! finance # Training data start_train_data = datetime(2010, 1, 1, 0, 0, 0, 0, pytz.utc) end_train_data = datetime(2014, 1, 1, 0, 0, 0, 0, pytz.utc) data_train = load_bars_from_yahoo(stocks=['AAPL'], start=start_train_data, end=end_train_data) # Testing data start_test_data = datetime(2014, 1, 1, 0, 0, 0, 0, pytz.utc) end_test_data = datetime(2016, 1, 1, 0, 0, 0, 0, pytz.utc) data_test = load_bars_from_yahoo(stocks=['AAPL'], start=start_test_data, end=end_test_data) return [data_train, data_test]
def load_data(): data_file_name = "data.hdf" if os.path.exists(data_file_name): data = pd.read_hdf(data_file_name, 'data', format='fixed') return data start = dt.datetime(2010, 1, 1, 0, 0, 0, 0, pytz.utc) end = dt.datetime(2014, 1, 2, 0, 0, 0, 0, pytz.utc) symbols = ["LUV", "VRTX"] data = load_bars_from_yahoo(stocks=symbols, start=start, end=end) data.to_hdf(data_file_name, 'data', format='fixed') return data
def prepare_data(tickers, start='', end=''): assert isinstance(tickers, list) if not end: now = datetime.datetime.now() end = now.strftime('%Y%m%d') if not start: start = (datetime.datetime.strptime(end, '%Y%m%d')+datetime.timedelta(days=-365*5)).strftime('%Y%m%d') tmpStart = datetime.datetime.strptime(start, '%Y%m%d') ystart = datetime.datetime(tmpStart.year, tmpStart.month, tmpStart.day, 0, 0, 0 ,0, pytz.utc) tmpEnd = datetime.datetime.strptime(end, '%Y%m%d') yend = datetime.datetime(tmpEnd.year, tmpEnd.month, tmpEnd.day, 0, 0, 0, 0, pytz.utc) stockDf = load_bars_from_yahoo(stocks=tickers, start=ystart, end=yend, adjusted=True) return stockDf.dropna()
def run_clusters(strategy_class, clustering_tickers, cluster_num, epochs_num, training_start, training_end, backtest_start, backtest_end, is_graph, is_elbow): """ Run the test given command-line args. Cluster. For each cluster, train a strategy on that cluster. For each stock in that cluster, run a backtest. Graph results. """ print "\nGathering data..." ticker_list, raw_stock_data_list = Manager.getRawStockDataList(clustering_tickers, training_start, training_end, 252) normalized_stock_data_list = [Manager.preprocessData(x) for x in raw_stock_data_list] print "\nClustering..." tickers, clusters = createClusters(ticker_list, normalized_stock_data_list, cluster_num) print "# of stocks: " + str(len(normalized_stock_data_list)) print "# of clusters: " + str(len(clusters)) print "" for t, c in itertools.izip(tickers, clusters): print "\tCluster: " + str(len(c)), "stocks: ", for symbol in t: print symbol, print "" if is_graph: graphClusters(clusters) if is_elbow: graphElbowMethod(normalized_stock_data_list) for t, cluster in itertools.izip(tickers, clusters): settings.STRATEGY_OBJECT = trainStrategy(strategy_class, cluster, epochs_num) for ticker in t: print "Cluster:", t print "Stock:", ticker tmp_ticks, tmp_data = Manager.getRawStockDataList([ticker], training_start, training_end, 252) settings.BACKTEST_STOCK = ticker settings.PRE_BACKTEST_DATA = tmp_data[0] print "Create Algorithm..." algo_obj = TradingAlgorithm(initialize=initialize, handle_data=handle_data) try: backtest_data = load_bars_from_yahoo(stocks=[ticker, 'SPY'], start=backtest_start, end=backtest_end) try: perf = algo_obj.run(backtest_data) analyze([ticker], [perf]) except ValueError, e: print str(e) except IOError, e: print "Stock Error: could not load", ticker, "from Yahoo." print "Only testing one cluster for now - Done!" return
def loadTrainingData(ticker, start, end): """ Data stored as (open, high, low, close, volume, price) Only take adjusted (open, high, low, close) """ data = load_bars_from_yahoo(stocks=[ticker], start=start, end=end) data = Manager.convertPanelToList(data) data = ([ ([ x[0]-x[0], # open x[1]-x[0], # high x[2]-x[0], # low x[3]-x[0], # close #x[4], # volume #x[5], # price (same as close) ]) for x in data # data stored as (open, high, low, close, volume, price) ]) return data
def test_load_bars_from_yahoo(self): stocks = ['AAPL', 'GE'] start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = load_bars_from_yahoo(stocks=stocks, start=start, end=end) assert data.major_axis[0] == pd.Timestamp('1993-01-04 00:00:00+0000') assert data.major_axis[-1] == pd.Timestamp('2001-12-31 00:00:00+0000') for stock in stocks: assert stock in data.items for ohlc in ['open', 'high', 'low', 'close', 'volume', 'price']: assert ohlc in data.minor_axis np.testing.assert_raises( AssertionError, load_bars_from_yahoo, stocks=stocks, start=end, end=start )
def loadTrainingData(ticker, start, end): """ Data stored as (open, high, low, close, volume, price) Only take adjusted (open, high, low, close) """ data = load_bars_from_yahoo(stocks=[ticker], start=start, end=end) data = Manager.convertPanelToList(data) data = ([ ([ x[0], # open x[1], # high x[2], # low x[3], # close #x[4], # volume #x[5], # price (same as close) ]) for x in data # data stored as (open, high, low, close, volume, price) ]) return data
def test_yahoo_bars_to_panel_source(self): stocks = ['AAPL', 'GE'] start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = factory.load_bars_from_yahoo(stocks=stocks, indexes={}, start=start, end=end) check_fields = ['sid', 'open', 'high', 'low', 'close', 'volume', 'price'] source = DataPanelSource(data) stocks_iter = cycle(stocks) for event in source: for check_field in check_fields: self.assertIn(check_field, event) self.assertTrue(isinstance(event['volume'], (integer_types))) self.assertEqual(next(stocks_iter), event['sid'])
def test_yahoo_bars_to_panel_source(self): stocks = ['AAPL', 'GE'] start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = factory.load_bars_from_yahoo(stocks=stocks, start=start, end=end) source = DataPanelSource(data) for event in source: self.assertTrue('sid' in event) self.assertTrue('open' in event) self.assertTrue('high' in event) self.assertTrue('low' in event) self.assertTrue('close' in event) self.assertTrue('volume' in event) self.assertTrue('price' in event) self.assertTrue(isinstance(event['volume'], (int, long)))
def test_yahoo_bars_to_panel_source(self): stocks = ['AAPL', 'GE'] start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = factory.load_bars_from_yahoo(stocks=stocks, indexes={}, start=start, end=end) check_fields = ['sid', 'open', 'high', 'low', 'close', 'volume', 'price'] source = DataPanelSource(data) stocks_iter = cycle(stocks) for event in source: for check_field in check_fields: self.assertIn(check_field, event) self.assertTrue(isinstance(event['volume'], (int, long))) self.assertEqual(stocks_iter.next(), event['sid'])
def markowitz(stocks, cash): warnings.filterwarnings("once") solvers.options['show_progress'] = False end = pd.Timestamp.utcnow() start = end - 50 * pd.tseries.offsets.BDay() data = load_bars_from_yahoo(stocks=stocks, start=start, end=end) # Instantinate algorithm algo = TradingAlgorithm(initialize=initialize, handle_data=handle_data, cash=cash) # Run algorithm results = algo.run(data) # portfolio value plot raw_plot = results.portfolio_value.plot() raw_fig = raw_plot.get_figure() returns_plot = mpld3.fig_to_html(raw_fig) raw_fig.clf() #stock price plot raw_price_data = data.loc[:, :, 'price'].pct_change(1).fillna(0).applymap( lambda x: x + 1).cumprod().applymap(lambda x: x * 100) raw_price_plot = raw_price_data.plot(figsize=(8, 5)) raw_price_fig = raw_price_plot.get_figure() price_plot = mpld3.fig_to_html(raw_price_fig) raw_price_fig.clf() #final returns equalweight_returns = sum(map(list, raw_price_data.tail(1).values)[0]) / 4 - 100 equalweight_returns = '{0:.2f}%'.format(float(equalweight_returns)) optimal_returns = (results.portfolio_value.tail(1).iloc[0] - 100000) / 1000 optimal_returns = '{0:.2f}%'.format(float(optimal_returns)) #efficient frontier plot frontier_plot_data = open("plot.png", "rb").read() # serialize to HTTP response frontier_plot = HttpResponse(frontier_plot_data, content_type="image/png") return (results, returns_plot, price_plot, frontier_plot, equalweight_returns, optimal_returns)
def fetch_equities_daily(self, symbols, ohlc=False, r_type=False, returns=False, **kwargs): if len(symbols) == 0: return pd.DataFrame() if isinstance(symbols, str): symbols = symbols.split(",") if ohlc: data = load_bars_from_yahoo(stocks=symbols, **kwargs) # data.items = symbols else: data = load_from_yahoo(stocks=symbols, **kwargs) # data.columns = symbols # NOTE Would it work with a pandas panel ? if returns: data = ((data - data.shift(1)) / data).fillna(method="bfill") if r_type: data = convert_to_r_matrix(data) return data
def markowitz(stocks, cash): warnings.filterwarnings("once") solvers.options['show_progress'] = False end = pd.Timestamp.utcnow() start = end - 50 * pd.tseries.offsets.BDay() data = load_bars_from_yahoo(stocks=stocks, start=start, end=end) # Instantinate algorithm algo = TradingAlgorithm(initialize=initialize, handle_data=handle_data, cash=cash) # Run algorithm results = algo.run(data) # portfolio value plot raw_plot = results.portfolio_value.plot() raw_fig = raw_plot.get_figure() returns_plot = mpld3.fig_to_html(raw_fig) raw_fig.clf() #stock price plot raw_price_data = data.loc[:, :, 'price'].pct_change(1).fillna(0).applymap(lambda x: x + 1).cumprod().applymap(lambda x: x * 100) raw_price_plot = raw_price_data.plot(figsize=(8,5)) raw_price_fig = raw_price_plot.get_figure() price_plot = mpld3.fig_to_html(raw_price_fig) raw_price_fig.clf() #final returns equalweight_returns = sum(map(list, raw_price_data.tail(1).values)[0]) / 4 - 100 equalweight_returns = '{0:.2f}%'.format(float(equalweight_returns)) optimal_returns = (results.portfolio_value.tail(1).iloc[0] - 100000) / 1000 optimal_returns = '{0:.2f}%'.format(float(optimal_returns)) #efficient frontier plot frontier_plot_data = open("plot.png", "rb").read() # serialize to HTTP response frontier_plot = HttpResponse(frontier_plot_data, content_type="image/png") return(results, returns_plot, price_plot, frontier_plot, equalweight_returns, optimal_returns)
def get_prices(symbols, dt_start, dt_end): """ Returns the 'adjusted' prices for the given timespan and the given symbols. Args: symbols [str]: The list of symbols dt_start (datetime): The data for the first t dt_end (datetime): Returns: Returns a pandas dataframe with the closing prices for the given symbols in the given timespan. """ try: prices = load_bars_from_yahoo(stocks=symbols, start=dt_start, end=dt_end) except RemoteDataError as e: msg = "An error occurred reading the prices for the given symbols." \ "Please make sure that the stock symbols are valid: {}".format(e) logging.getLogger().warning(msg) raise RemoteDataError(msg) return prices
def fetch_equities_daily(self, equities, ohlc=False, r_type=False, returns=False, **kwargs): if len(equities) == 0: return pd.DataFrame() if isinstance(equities, str): equities = equities.split(',') symbols = [self.datafeed.guess_name(equity) for equity in equities] if ohlc: data = load_bars_from_yahoo(stocks=symbols, **kwargs) data.items = equities else: data = load_from_yahoo(stocks=symbols, **kwargs) data.columns = equities #NOTE Would it work with a pandas panel ? if returns: data = ((data - data.shift(1)) / data).fillna(method='bfill') if r_type: data = convert_to_r_matrix(data) return data
def preproc_data(): """Preprocess raw data into TP Matrix format""" # Load data manually from Yahoo! finance start = datetime(2009, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2016, 1, 1, 0, 0, 0, 0, pytz.utc) data = load_bars_from_yahoo(stocks=['AAPL'], start=start, end=end) # Initialize TP Matrix # 3-dimension: # of stock * 18 * 18 # narray _TP_matrixs = np.zeros((len(data.ix['AAPL']) - 230, 18, 18), dtype=np.bool) TP_matrixs = pd.Panel(_TP_matrixs, items=data.ix['AAPL'].index[230:]) # Construct TP Matrix for TP_matrix in TP_matrixs.iteritems(): # Extract raw close price of last 230 days _list_CP = data.ix['AAPL'][ data.ix['AAPL'].index < TP_matrix[0]]['close'].tolist() list_CP = _list_CP[len(_list_CP) - 230:len(_list_CP)] # col[0, 8] for Upward TP Matrix # col[9, 17] for Downward TP Matrix for col in range(0, 18): D = columns[col][0] - 1 for row in range(0, 18): # For each element of TP Matrix for TP in range(D, columns[col][1]): # Change ratio of stock on day D with repect to the price # at TP C_TPD = (list_CP[TP] - list_CP[D]) / list_CP[D] if C_TPD * 100 >= rows[row][0] and C_TPD * 100 < rows[row][ 1]: TP_matrix[1][row][col] = True break return TP_matrixs
def get_data(self, sids, start, end): data = factory.load_bars_from_yahoo( stocks=sids, indexes={}, start=start, end=end) self.sids = data.items return data
import talib import numpy as np # Set the start and end dates start = datetime(2000, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2016, 4, 30, 0, 0, 0, 0, pytz.utc) index = {'SPX': '^GSPC'} stock = None time_period = 30 # Techinical Indicators time period pred_window = 1 # prediction window in days train_ratio = 0.7 # training records ratio fold_number = 5 # How many folder # Load data from Yahoo Finance data = load_bars_from_yahoo(indexes=index, stocks=stock, start=start, end=end) print data[index.keys()[0]].describe() # Exploratory analysis get_ipython().magic(u'matplotlib inline') import pandas as pd import matplotlib.pyplot as plt import matplotlib matplotlib.style.use('ggplot') sname = index.keys()[0] vol = data[sname][['volume']] bars = data[sname][['open', 'high', 'low', 'close', 'price']] ax = bars.plot(figsize=(8, 6), title='(a) 2001-2016 Historical Price of S&P 500 Index')
from zipline.algorithm import TradingAlgorithm from zipline.utils.factory import load_bars_from_yahoo #from zipline import run_algorithm import matplotlib.pyplot as plt ###------------------------------------------------------------------Packages Imported-------------------------- #assign_stock = 'SPY' assign_stock = 'XLB' #assign_stock = 'BHANDHOS.BO' #assign_stock = 'AAPL' #assign_stock = 'MSFT' #assign_stock = 'WIPRO.BO' start = datetime(2014,1,1, 0, 0, 0, 0, pytz.utc).date() end = datetime(2016,1,1,0,0,0,0, pytz.utc).date() #data = load_bars_from_yahoo(stocks=['SPY'], start=start,end=end) data = load_bars_from_yahoo(stocks=[assign_stock], start=start,end=end) def initialize(context): #context.stocks = symbols('XLY', # XLY Consumer Discrectionary SPDR Fund # 'XLF', # XLF Financial SPDR Fund # 'XLK', # XLK Technology SPDR Fund # 'XLE', # XLE Energy SPDR Fund # 'XLV', # XLV Health Care SPRD Fund # 'XLI', # XLI Industrial SPDR Fund # 'XLP', # XLP Consumer Staples SPDR Fund # 'XLB', # XLB Materials SPDR Fund # 'XLU') # XLU Utilities SPRD Fund #context.stocks = symbol('SPY') context.stocks = symbol('XLB') context.historical_bars = 100
import talib import numpy as np # Set the start and end dates start = datetime(2000, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2016, 4, 30, 0, 0, 0, 0, pytz.utc) index = {'SPX': '^GSPC'} stock = None time_period = 30 # Techinical Indicators time period pred_window = 1 # prediction window in days train_ratio = 0.7 # training records ratio fold_number = 5 # How many folder # Load data from Yahoo Finance data = load_bars_from_yahoo(indexes=index, stocks=stock, start=start, end=end) print data[index.keys()[0]].describe() # Exploratory analysis get_ipython().magic(u'matplotlib inline') import pandas as pd import matplotlib.pyplot as plt import matplotlib matplotlib.style.use('ggplot') sname = index.keys()[0] vol = data[sname][['volume']] bars = data[sname][['open', 'high', 'low', 'close', 'price']] ax = bars.plot(figsize=(8,6), title='(a) 2001-2016 Historical Price of S&P 500 Index')
print str(date)[0:10],'LONG:',sym elif self.portfolio.positions[sym].amount != 0: if sym_price >= self.atr_plot[sym]['profit'][-1] or sym_price <= self.atr_plot[sym]['loss'][-1]: q = self.portfolio.positions[sym].amount self.order(sym,-q) print str(date)[0:10],'Exit:',sym else: self.zscores[sym].append(0) self.zscores_s[sym].append(0) atr = self.get_atr(sym)[-1] self.atr_plot[sym]['profit'].append((atr*3)+sym_price) self.atr_plot[sym]['loss'].append(-(atr*3)+sym_price) self.day_count += 1 if __name__ == '__main__': data = load_bars_from_yahoo(stocks=sym_list, indexes={}, start=start, end=end) trend_trader = trend_trader() results = trend_trader.run(data) ########################################################################### # Generate metrics print 'Generating Risk Report...........' print 'Using S&P500 as benchmark........' start = results.first_valid_index().replace(tzinfo=pytz.utc) end = results.last_valid_index().replace(tzinfo=pytz.utc) env = trading.SimulationParameters(start, end) returns_risk = create_returns_from_list(results.returns, env) algo_returns = RiskMetricsBase(start, end, returns_risk).algorithm_period_returns benchmark_returns = RiskMetricsBase(start, end, returns_risk).benchmark_period_returns
# Show the plot. plt.gcf().set_size_inches(18, 8) plt.show() # Note: this if-block should be removed if running # this algorithm on quantopian.com if __name__ == '__main__': from datetime import datetime import pytz from zipline.algorithm import TradingAlgorithm from zipline.utils.factory import load_bars_from_yahoo asset = "VRX" print "--->>> Some more changes" # Set the simulation start and end dates start = datetime(2015, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2015, 11, 1, 0, 0, 0, 0, pytz.utc) # Load price data from yahoo. data = load_bars_from_yahoo(stocks=[asset], indexes={}, start=start, end=end, adjusted=False) # Create and run the algorithm. algo = TradingAlgorithm(initialize=initialize, handle_data=handle_data, identifiers=[asset]) results = algo.run(data) #analyze(results=results, asset=asset)
inverse_period = 1.0 / ( dateDelta.days / ( period / 12 ) ) else: cagr_period = "DAILY" inverse_period = 1.0 / ( dateDelta.days ) performance = ( self.portfolio.portfolio_value / self.cashStart ) cagr = pow( performance, inverse_period ) - 1.0 print( '%s: %s CAGR %s%%, PNL $%s, CASH $%s, PORTFOLIO $%s' % ( dateStr, cagr_period, ( cagr * 100 ), self.portfolio.pnl, self.portfolio.cash, self.portfolio.portfolio_value ) ) return cagr # # END OF CLASS GMRE # Remove previous Yahoo download content to assure clean backtest # cmd = removeCommand + ' ' + ziplineDataPath # returnCode = call(cmd, shell=True) # if returnCode != 0: # print("Couldn't %s :: %s" % (cmd, returnCode)) # sys.exit() start = datetime( *startDateTime ) end = datetime( *endDateTime ) data = load_bars_from_yahoo( stocks = basket, indexes = {}, start = start, end = end, adjusted = priceAdjusted ) gmre = GMRE() perf = gmre.run( data ) # Get the CAGR gmre.cagr()
def get_data(self, start, end=None): if end is None: end = pd.Timestamp.utcnow() return load_bars_from_yahoo(stocks=self, start=pd.Timestamp(start, tz='utc'), end=pd.Timestamp(end, tz='utc'))
plt.show() # Note: this if-block should be removed if running # this algorithm on quantopian.com if __name__ == '__main__': from datetime import datetime import pytz from zipline.algorithm import TradingAlgorithm from zipline.utils.factory import load_bars_from_yahoo # Set the simulation start and end dates. start = datetime(2011, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2013, 1, 1, 0, 0, 0, 0, pytz.utc) # Load price data from yahoo. data = load_bars_from_yahoo( stocks=['AAPL'], indexes={}, start=start, end=end, ) # Create and run the algorithm. algo = TradingAlgorithm(initialize=initialize, handle_data=handle_data) results = algo.run(data) # Plot the portfolio and asset data. analyze(results=results)
>>> print proj array([ 0.33333333, 0.23333333, 0. , 0.43333333]) >>> print proj.sum() 1.0 Original matlab implementation: John Duchi ([email protected]) Python-port: Copyright 2013 by Thomas Wiecki ([email protected]). """ v = np.asarray(v) p = len(v) # Sort v into u in descending order v = (v > 0) * v u = np.sort(v)[::-1] sv = np.cumsum(u) rho = np.where(u > (sv - b) / np.arange(1, p+1))[0][-1] theta = np.max([0, (sv[rho] - b) / (rho+1)]) w = (v - theta) w[w < 0] = 0 return w if __name__ == '__main__': import pylab as pl data = load_bars_from_yahoo(stocks=STOCKS, indexes={}) olmar = OLMAR() results = olmar.run(data) results.portfolio_value.plot() pl.show()
inverse_period = 1.0 / ( dateDelta.days / ( period / 12 ) ) else: cagr_period = "Daily" inverse_period = 1.0 / ( dateDelta.days ) performance = ( self.portfolio.portfolio_value / self.cashStart ) cagr = pow( performance, inverse_period ) - 1.0 print( '%s: %s CAGR %s%%, PNL $%s, CASH $%s, PORTFOLIO $%s' % ( dateStr, cagr_period, ( cagr * 100 ), self.portfolio.pnl, self.portfolio.cash, self.portfolio.portfolio_value ) ) return cagr # # END OF CLASS GMRE # Remove previous Yahoo download content to assure clean backtest # cmd = removeCommand + ' ' + ziplineDataPath # returnCode = call(cmd, shell=True) # if returnCode != 0: # print("Couldn't %s :: %s" % (cmd, returnCode)) # sys.exit() start = datetime( *startDateTime ) end = datetime( *endDateTime ) data = load_bars_from_yahoo( stocks = basket, indexes = {}, start = start, end = end, adjusted = priceAdjusted ) gmre = GMRE() perf = gmre.run( data ) # Get the CAGR gmre.cagr()
def fetch(tickers, tBeg, tEnd, adjusted=False): return load_bars_from_yahoo(stocks=tickers, indexes={}, start=tBeg, end=tEnd, adjusted=adjusted)
# Instantiate log mylogger = MyLogger() # Log directory directory_log = str(datetime.now())[0:19].replace(':', '-') # Create log directory os.makedirs('log/' + stock_name + '/' + directory_log) # Add file handle to mylogger mylogger.addFileHandler(directory_log) # Load data start_date = datetime(2014, 1, 1, 0, 0, 0, 0, pytz.utc) end_date = datetime(2016, 1, 1, 0, 0, 0, 0, pytz.utc) data = load_bars_from_yahoo(stocks=[stock_name], start=start_date, end=end_date) # Create algorithm object passing in initialize and # handle_data functions capital_base = 100000 algo_obj = TradingAlgorithm(initialize=initialize, handle_data=handle_data, analyze=analyze, data_frequency='daily', capital_base=capital_base) # Run algorithm perf = algo_obj.run(data)
# 3: ('HAS', '2/20/2013'), # 4: ('INTC', '3/12/2012'), # 5: ('GMCR', '2/20/2013'), # 6: ('CRM', '12/12/2005'), # 7: ('VRTX', '8/26/2013'), # 8: ('DPM', '12/12/2012'), # 9:('YHOO', '8/14/2002'), # 10:('GOOGL', '6/24/2005'), # 11:('FCX', '3/4/2004'), } purchases = pd.DataFrame( {i: (X[i][0], pd.to_datetime(pd.Timestamp(X[i][1], tz='utc'))) for i in X}).T purchases.columns = columns = ['symbol', 'Date'] data = load_bars_from_yahoo(stocks=purchases.symbol) prices = data.minor_xs('price') prices def thing(days_back, days_ahead): data = {} delta = datetime.timedelta for i in purchases.index: ticker, dt = purchases.ix[i] before = dt - delta(days=days_back) after = dt + delta(days=days_ahead) data[i] = prices[ticker].truncate(before=before, after=after).dropna() return data
if __name__ == '__main__': from datetime import datetime import pytz from zipline.algorithm import TradingAlgorithm from zipline.utils.factory import load_bars_from_yahoo import pandas as pd import matplotlib.pyplot as plt # Create and run the algorithm. algo = TradingAlgorithm(initialize=initialize, handle_data=handle_data) start = datetime(2010, 5, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2016, 5, 31, 0, 0, 0, 0, pytz.utc) print('Getting Yahoo data for 30 DJIA stocks and DIA ETF as benchmark...') data = load_bars_from_yahoo(stocks=tickers, start=start, end=end) # Check price data data.loc[:, :, 'price'].plot(figsize=(8,7), title='Input Price Data') plt.ylabel('price in $'); plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5)) plt.show() # Run algorithm results = algo.run(data) # Fix possible issue with timezone results.index = results.index.normalize() if results.index.tzinfo is None: results.index = results.index.tz_localize('UTC') # Adjust benchmark returns for delayed trading due to 3 year min data window
portfolios = [solvers.qp(mu * S, -pbar, G, h, A, b)['x'] for mu in mus] ## CALCULATE RISKS AND RETURNS FOR FRONTIER returns = [blas.dot(pbar, x) for x in portfolios] risks = [np.sqrt(blas.dot(x, S * x)) for x in portfolios] ## CALCULATE THE 2ND DEGREE POLYNOMIAL OF THE FRONTIER CURVE m1 = np.polyfit(returns, risks, 2) x1 = np.sqrt(m1[2] / m1[0]) # CALCULATE THE OPTIMAL PORTFOLIO wt = solvers.qp(opt.matrix(x1 * S), -pbar, G, h, A, b)['x'] return np.asarray(wt), returns, risks weights, returns, risks = optimal_portfolio(return_vec) plt.plot(stds, means, 'o') plt.ylabel('mean') plt.xlabel('std') plt.plot(risks, returns, 'y-o') from zipline.utils.factory import load_bars_from_yahoo end = datetime.date.today() start = datetime.date(2015, 1, 1) data = load_bars_from_yahoo( stocks=['IBM', 'GLD', 'XOM', 'AAPL', 'MSFT', 'TLT', 'SHY'], start=start, end=end) data.loc[:, :, 'price'].plot(figsize=(8, 5)) plt.ylabel('price in $')
>>> print proj.sum() 1.0 Original matlab implementation: John Duchi ([email protected]) Python-port: Copyright 2013 by Thomas Wiecki ([email protected]). """ v = np.asarray(v) p = len(v) # Sort v into u in descending order v = (v > 0) * v u = np.sort(v)[::-1] sv = np.cumsum(u) rho = np.where(u > (sv - b) / np.arange(1, p + 1))[0][-1] theta = np.max([0, (sv[rho] - b) / (rho + 1)]) w = v - theta w[w < 0] = 0 return w if __name__ == "__main__": import pylab as pl data = load_bars_from_yahoo(stocks=STOCKS, indexes={}) olmar = OLMAR() results = olmar.run(data) results.portfolio_value.plot() pl.show()
TickerList.tickers = ['AMD', 'CERN', 'COST', 'DELL', 'GPS', 'INTC', 'MMM'] def genWeights(size): while True: #create weights so that sum weights = 1 randomWeights = np.array([[random.choice([0.0, 1.0])] for i in xrange(size)]) #scaledweights = randomWeights * (1 / np.sum(randomWeights)) yield randomWeights if __name__ == '__main__': csvFileName = raw_input("CSV File Name: ") start = datetime(2010, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2014, 1, 1, 0, 0, 0, 0, pytz.utc) data = load_bars_from_yahoo(stocks=TickerList.tickers, indexes={}, start=start, end=end) #use last known price for missing data data = data.fillna(method='ffill') #fill data at front of series data = data.fillna(method='bfill') #drop any other data data = data.dropna(how='any') #now shape of data is finalised num_tickers = data.shape[0] print 'Number of stocks', num_tickers #init csv file
def get_data(ENVIRONMENT, tickers, start, end, benchmark, risk_free, cash_proxy): if ENVIRONMENT == 'ZIPLINE': benchmark_symbol = benchmark cash_proxy_symbol = cash_proxy risk_free_symbol = risk_free elif ENVIRONMENT == 'RESEARCH': benchmark_symbol = symbols(benchmark) cash_proxy_symbol = symbols(cash_proxy) risk_free_symbol = symbols(risk_free) # data is a Panel of DataFrames, one for each security if ENVIRONMENT == 'ZIPLINE': data = load_bars_from_yahoo( stocks = list(set(tickers + [benchmark_symbol, cash_proxy_symbol, risk_free_symbol])), start = start, end = end, adjusted=False).transpose(2,1,0) data.price = data.close # use this for comparing to Quantopian 'get_pricing' elif ENVIRONMENT == 'RESEARCH': data = get_pricing( set(tickers + [benchmark_symbol, cash_proxy_symbol, risk_free_symbol]), start_date=start, end_date = end, frequency='daily' ) # repair unusable data # BE CAREFUL!! dropna doesn't change the Panel's Major Index, so NA may still remain! # safer to use ffill for security in data.transpose(2,1,0): data.transpose(2,1,0)[security] = data.transpose(2,1,0)[security].ffill() if ENVIRONMENT == 'ZIPLINE': other_data = load_bars_from_yahoo( stocks=[benchmark_symbol] + [cash_proxy_symbol], start=start, end=end, adjusted=False) # use this for comparing to Quantopian 'get_pricing' other_data.transpose(2,1,0).price = other_data.transpose(2,1,0).close # use this for comparing to Quantopian 'get_pricing' elif ENVIRONMENT == 'RESEARCH': other_data = get_pricing( [benchmark_symbol] + [cash_proxy_symbol], fields='price', start_date= data.major_axis[0], end_date = data.major_axis[-1], frequency='daily', ) other_data = other_data.ffill() # need to add benchmark (eg SPY) and cash proxy to data panel benchmark = other_data[benchmark_symbol] benchmark_rets = benchmark.pct_change().dropna() benchmark2 = other_data[cash_proxy_symbol] benchmark2_rets = benchmark2.pct_change().dropna() # make sure we have all the data we need inception_dates = pd.DataFrame([data.transpose(2,1,0)[security].dropna().index[0].date() \ for security in data.transpose(2,1,0)], \ index=data.transpose(2,1,0).items, columns=['inception']) inception_dates.loc['benchmark'] = benchmark.index[0].date() inception_dates.loc['benchmark2'] = benchmark2.index[0].date() print(inception_dates) print() # check that the end dates coincide end_dates = pd.DataFrame([data.transpose(2,1,0)[security].dropna().index[-1].date() \ for security in data.transpose(2,1,0)], \ index=data.transpose(2,1,0).items, columns=['end_date']) end_dates.loc['benchmark'] = benchmark.index[-1].date() end_dates.loc['benchmark2'] = benchmark2.index[-1].date() print(end_dates) # this will ensure that the strat and end dates are aligned data = data[:,inception_dates.values.max(): end_dates.values.min(),:] benchmark_rets = benchmark_rets[inception_dates.values.max(): end_dates.values.min()] benchmark2_rets = benchmark2_rets[inception_dates.values.max(): end_dates.values.min()] print ('\n\nBACKTEST DATA IS FROM {} UNTIL {} \n*************************************************' .format(inception_dates.values.max(), end_dates.values.min())) # DATA FROM ZIPLINE LOAD_YAHOO_BARS DIFFERS FROM RESEARCH ENVIRONMENT! data.items = ['open_price', 'high', 'low', 'close_price', 'volume', 'price'] print ('\n\n{}'.format(data)) return data
self.order(sym, -q) self.sell_plot[sym].append(self.day_count) elif self.portfolio.positions[sym].amount < 0: if sym_price <= self.stops[sym][ 0] or sym_price >= self.stops[sym][1]: print day, ' Exit Short ', sym q = self.portfolio.positions[sym].amount self.order(sym, -q) self.buy_plot[sym].append(self.day_count) self.day_count += 1 if __name__ == '__main__': start = datetime(2012, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2013, 01, 01, 0, 0, 0, 0, pytz.utc) data = load_bars_from_yahoo(stocks=feed, indexes={}, start=start, end=end) sector_rs = sector_rs() results = sector_rs.run(data) ########################################################################### # Generate metrics print 'Generating Risk Report...........' print 'Using S&P500 as benchmark........' start = results.first_valid_index().replace(tzinfo=pytz.utc) end = results.last_valid_index().replace(tzinfo=pytz.utc) env = trading.SimulationParameters(start, end) returns_risk = create_returns_from_list(results.returns, env) algo_returns = RiskMetricsBase(start, end, returns_risk).algorithm_period_returns
print day,' Exit Long ',sym q = self.portfolio.positions[sym].amount self.order(sym,-q) self.sell_plot[sym].append(self.day_count) elif self.portfolio.positions[sym].amount < 0: if sym_price <= self.stops[sym][0] or sym_price >= self.stops[sym][1]: print day,' Exit Short ',sym q = self.portfolio.positions[sym].amount self.order(sym,-q) self.buy_plot[sym].append(self.day_count) self.day_count += 1 if __name__ == '__main__': start = datetime(2012, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2013, 01, 01, 0, 0, 0, 0, pytz.utc) data = load_bars_from_yahoo(stocks=feed, indexes={}, start=start, end=end) sector_rs = sector_rs() results = sector_rs.run(data) ########################################################################### # Generate metrics print 'Generating Risk Report...........' print 'Using S&P500 as benchmark........' start = results.first_valid_index().replace(tzinfo=pytz.utc) end = results.last_valid_index().replace(tzinfo=pytz.utc) env = trading.SimulationParameters(start, end) returns_risk = create_returns_from_list(results.returns, env) algo_returns = RiskMetricsBase(start, end, returns_risk).algorithm_period_returns benchmark_returns = RiskMetricsBase(start, end, returns_risk).benchmark_period_returns
def analyze(context, perf): fig = plt.figure() ax1 = fig.add_subplot(211) perf.portfolio_value.plot(ax=ax1) ax1.set_ylabel("portfolio value in $") ax2 = fig.add_subplot(212) perf[eqSymbol].plot(ax=ax2) perf[["mc_price"]].plot(ax=ax2) ax2.set_ylabel("price in $") plt.legend(loc=0) plt.show() if __name__ == "__main__": # Load data manually from Yahoo! finance eqSymbol = "YHOO" start = datetime(2010, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2014, 1, 1, 0, 0, 0, 0, pytz.utc) data = load_bars_from_yahoo(stocks=[eqSymbol], start=start, end=end) # Create algorithm object algo_obj = MonteCarloTradingAlgorithm() # Run algorithm perf_manual = algo_obj.run(data) # print(perf_manual) # print(perf_manual.ending_value[-1])
["2009-06-08", sid(7041), 1], ["2009-06-08", sid(1900), 1], ["2012-09-24", sid(22802), 0], ["2012-09-24", sid(7792), 1], ["2013-09-23", sid(700), 0], ["2013-09-23", sid(3735), 0], ["2013-09-23", sid(20088), 1], ["2013-09-23", sid(35920), 1], ["2015-03-18", sid(6653), 0], ["2015-03-18", sid(24), 1] ] # Load data manually from Yahoo! finance start = datetime(2011, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2012, 1, 1, 0, 0, 0, 0, pytz.utc) data = load_bars_from_yahoo(stocks=['SPY'], start=start, end=end) def get_dow(dow_constituents): date = datetime.date() ret = [] for equity in dow_constituents: if equity[0] > date: break if equity[2] == 1: ret.append(equity[1]) elif equity[2] == 0: ret.remove(equity[1]) else: raise Exception('unknown membership') return ret
print weights # ### Backtesting on real market data # This is all very interesting but not very applied. We next demonstrate how you can create a simple algorithm in [`zipline`](http://github.com/quantopian/zipline) -- the open-source backtester that powers [Quantopian](https://www.quantopian.com) -- to test this optimization on actual historical stock data. # # First, lets load in some historical data using [Quantopian](https://www.quantopian.com)'s data (if we are running in the [Quantopian Research Platform](http://blog.quantopian.com/quantopian-research-your-backtesting-data-meets-ipython-notebook/), or the `load_bars_from_yahoo()` function from `zipline`. # In[5]: from zipline.utils.factory import load_bars_from_yahoo end = pd.Timestamp.utcnow() start = end - 2500 * pd.tseries.offsets.BDay() data = load_bars_from_yahoo(stocks=['IBM', 'GLD', 'XOM', 'AAPL', 'MSFT', 'TLT', 'SHY'], start=start, end=end) # In[9]: data.loc[:, :, 'price'].iplot(filename='prices', yTitle='price in $', world_readable=True, asDates=True) # Next, we'll create a `zipline` algorithm by defining two functions -- `initialize()` which is called once before the simulation starts, and `handle_data()` which is called for every trading bar. We then instantiate the algorithm object. # # If you are confused about the syntax of `zipline`, check out the [tutorial](http://nbviewer.ipython.org/github/quantopian/zipline/blob/master/docs/notebooks/tutorial.ipynb). # In[14]: import zipline