def read_data(self, dataprovider): #find out what symbols are there c_dataobj = da.DataAccess(dataprovider) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] dt_timeofday = dt.timedelta(hours=16) try: with open(self.filepath, 'rU') as csv_con: readerlist = list( csv.reader(csv_con, delimiter=",", quoting=csv.QUOTE_NONE)) #convert strings to python datetimes reader_listDT = insertDT(readerlist) #sort in chronological order so we can prepare neat orders table sorted(reader_listDT, key=lambda ticker: ticker[0]) #getting set of tickers from trade book portfolio = [] for row in reader_listDT: portfolio.append(row[3]) #took out from program method to place in initialisation phase self.tradedates.append(row[0]) portfolioset = set(portfolio) #timestamps for given trade period self.ldt_timestamps = du.getNYSEdays(reader_listDT[0][0], reader_listDT[-1][0], dt.timedelta(hours=16)) ldf_data = c_dataobj.get_data(self.ldt_timestamps, list(portfolioset), ls_keys) #after zipping up with ls_keys, it's basically a dictionary of dictionary self.d_data = dict(zip(ls_keys, ldf_data)) #need order table to run simulation self.order_table = copy.deepcopy(reader_listDT) #initialize portfolio allocation self.portfolio = {x: 0 for x in portfolioset} except Exception, e: print e
def main(argv): """ Creates bollinger bands for the given symbol and dates will create a chart will conver to -1 to 1 range """ dt_start = dt.datetime(2010, 1, 1) dt_end = dt.datetime(2010, 12, 31) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) dataobj = da.DataAccess('Yahoo') ls_symbols = ['AAPL', 'GOOG', 'IBM', 'MSFT'] ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) df_close = d_data['close'] ldt_timestamps = df_close.index lookback = 20 bol_vals = get_bollinger_values(ls_symbols, d_data, lookback) print bol_vals
def init_data(dt_start, dt_end, ls_symbols): # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) return d_data
def getMarketCloseDataFor(aListOfSymbols, daysOfMarketOpen): # dataObject = da.DataAccess('Yahoo', cachestalltime=0) dataObject = da.DataAccess('Yahoo') rawMarketData = dataObject.get_data(daysOfMarketOpen, aListOfSymbols, createMarketKeys()) dataDictionary = dict(zip(createMarketKeys(), rawMarketData)) return dataDictionary['close']
def getYahooData(startdate, enddate, symbols): """ @summary Returns the adjusted closing prices @param startdate: @param enddate: @param symbols: List of Symbols @return yahoo data """ # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(startdate, enddate, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. # Read in adjusted closing prices for the equities. ldf_data = c_dataobj.get_data(ldt_timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) return d_data
def get_symbols_in_year(dataobj, year): dataobj = da.DataAccess('Yahoo') ls_symbols = dataobj.get_symbols_from_list('sp500' + str(year)) ls_symbols.append('SPY') return ls_symbols
def get_symbols_from_list(list_name): """Retrieve a named (symbol list name) list of strings (symbols) If you've installed the QSTK Quantitative analysis toolkit `get_symbols_from_list('sp5002012')` will produce a list of the symbols that were members of the S&P 500 in 2012. Otherwise an import error exception will be raised. If the symbol list cannot be found you'll get an empty list returned Example: >> len(get_symbols_from_list('sp5002012')) in (0, 501) True """ try: # quant software toolkit has a method for retrieving lists of symbols like S&P500 for 2012 with 'sp5002012' import QSTK.qstkutil.DataAccess as da dataobj = da.DataAccess('Yahoo') except ImportError: raise except: return [] try: return dataobj.get_symbols_from_list(list_name) except: raise
def simulate(date_start, date_end, symbols, allocations): date_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(date_start, date_end, date_timeofday) c_dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) na_price = d_data['close'].values # Normalizing the prices to start at 1 and see relative returns na_normalized_price = na_price / na_price[0, :] # Copy the normalized prices to a new ndarry to find returns. na_rets = na_normalized_price.copy() na_portrets = np.sum(na_rets * allocations, axis = 1) na_port_total = np.cumprod(na_portrets + 1) tsu.returnize0(na_portrets) tsu.returnize0(na_port_total) rf_rate = 0 vol = np.std(na_portrets) daily_ret = np.average(na_portrets) cum_ret = na_port_total[-1] #sharpe = np.sqrt(len(ldt_timestamps))*((cum_ret - rf_rate)/vol) sharpe = np.sqrt(252)*((daily_ret - rf_rate)/vol) return vol, daily_ret, sharpe, cum_ret
def simulate(dt_start, dt_end, ls_symbols, lf_port_alloc): # Formatting timestamps dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating dataset, read in closing price and create dictionary c_dataobj = da.DataAccess('Yahoo') ls_keys = ['close'] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) # Getting the numpy ndarray of close prices and calculate daily portfolio returns na_price = d_data['close'].values na_normalized_price = na_price / na_price[0, :] na_daily_rets = na_normalized_price.copy() tsu.returnize0(na_daily_rets) na_daily_portrets = np.sum(na_daily_rets * lf_port_alloc, axis=1) # Calculate portfolio returns and estimate statistics mean_portret = np.mean(na_daily_portrets) port_vol = np.std(na_daily_portrets) port_sharpe = np.sqrt(252) * mean_portret / port_vol cum_portrets = np.cumprod(na_daily_portrets + 1) cum_portret = cum_portrets[-1] return port_vol, mean_portret, port_sharpe, cum_portret
def run(dt_start, dt_end, s_list, find_function, event_amount): print "-----------------------------------------------------------" print "Start date: " + str(dt_start) print "End date : " + str(dt_end) print "Symbols : " + s_list print "Function : " + find_function.__doc__ print "Event Amt : " + str(event_amount) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) dataobj = da.DataAccess('Yahoo') ls_symbols = dataobj.get_symbols_from_list(s_list) ls_symbols.append('SPY') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) df_orders = find_function(ls_symbols, d_data, event_amount) return df_orders
def simulate(dt_start, dt_end, ls_symbols, ls_allocation): # Formatting the date timestamps dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Open the dataset and read in the closing price ls_keys = ['close'] c_dataobj = da.DataAccess('Yahoo') ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Calculate the portfolio value temp = d_data['close'].values.copy() d_normal = temp / temp[0, :] alloc = np.array(ls_allocation).reshape(4, 1) portVal = np.dot(d_normal, alloc) # Caluclate the daily returns dailyVal = portVal.copy() tsu.returnize0(dailyVal) # Calculate statistics daily_ret = np.mean(dailyVal) vol = np.std(dailyVal) sharpe = np.sqrt(NUM_TRADING_DAYS) * daily_ret / vol cum_ret = portVal[portVal.shape[0] - 1][0] return vol, daily_ret, sharpe, cum_ret
def read_data(start_date, end_date, ls_symbols): ''' read the prices for the specified symbols''' # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(start_date, end_date, dt_timeofday) print "Reading data" # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo', cachestalltime=0) # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) print "Done" d_data = dict(zip(ls_keys, ldf_data)) # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) # Getting the numpy ndarray of close prices. na_price = d_data['close'].values na_price_df = pd.DataFrame(na_price, columns=ls_symbols, index=d_data['close'].index) return (na_price_df)
def __init__(self, filename, dataprovider, startdt, enddt, initial_cash): self.dt_start = startdt self.dt_end = enddt self.filepath = filename self.cumulativeportval = initial_cash self.order_table = [] self.portfolio = dict self.ldt_timestamps = [] self.tradedates = [] self.d_data = [] self.daily_portfolio_val = [] self.ls_symbols = [] self.ldt_timestamps = du.getNYSEdays(self.dt_start, self.dt_end, dt.timedelta(hours=16)) self.dataobj = da.DataAccess(dataprovider) self.ls_symbols = self.dataobj.get_symbols_from_list('sp5002012') #ls_symbols_2008 = dataobj.get_symbols_from_list('sp5002008') # #ls_symbols_2008.append('SPY') spy_ldf_data = self.dataobj.get_data(self.ldt_timestamps, ['SPY'], ls_keys) #ldf_data_2008 = dataobj.get_data(ldt_timestamps, ls_symbols_2008, ls_keys) self.spy_d_data = dict(zip(ls_keys, spy_ldf_data ))
def get_market_data(dt_start, dt_end, ls_symbols): ls_symbols += ["SPY"] # Get a list of trading days between the start and the end. # Adding a day to the end_date since it doesn't seem to count the last day ldt_timestamps = du.getNYSEdays(dt_start, dt_end + dt.timedelta(days=1), DT_TIME_OF_DAY) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) return d_data
def compare_to_SPX(ldt_timestamps, values, output_pdf='hw3.pdf'): c_dataobj = da.DataAccess('Yahoo') ldf_data = c_dataobj.get_data(ldt_timestamps, ['$SPX'], ['close']) spx_values = ldf_data[0].values values = np.expand_dims(values, axis=1) prices = np.hstack((values, spx_values)) na_normalized_prices = prices / prices[0, :] def stock_stat(prices): na_rets = tsu.returnize0(prices) N = 252 std = np.std(na_rets, axis=0) mean = np.mean(na_rets, axis=0) sr = np.sqrt(N) * mean / std total = np.cumprod(na_rets + 1, axis=0)[-1, :] return (std, mean, sr, total) (std, mean, sr, total) = stock_stat(na_normalized_prices) print('Date Range: %s to %s' % (ldt_timestamps[0], ldt_timestamps[-1])) print('Sharpe Ratio of Fund and $SPX: %s' % (sr)) print('Total Return of Fund and $SPX: %s' % (total)) print('Standard Deviation of Fund and $SPX: %s' % (std)) print('Average Daily Return of Fund and $SPX: %s' % (mean)) plt.clf() N = na_normalized_prices.shape[0] plt.plot(np.arange(N), na_normalized_prices) plt.legend(['Fund', '$SPX'], loc='upper left') plt.ylabel('Normalized Prices') plt.xlabel('Time') plt.savefig(output_pdf, format='pdf')
def main(ls_symbols, dt_start, dt_end, i_lookback, s_key): dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) c_dataobj = da.DataAccess('Yahoo') # list_name = 'sp5002012' # ls_symbols = c_dataobj.get_symbols_from_list(list_name) # TODO: Track index separately from composite stocks. Trigger could activate off of index. # ls_symbols.append('SPY') # ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ls_keys = [s_key] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # TODO: Track index separately from composite stocks. Trigger could activate off of index. df_close = d_data[s_key].copy() for s_sym in ls_symbols: df_close[s_sym] = df_close[s_sym].fillna(method='ffill') df_close[s_sym] = df_close[s_sym].fillna(method='bfill') df_close[s_sym] = df_close[s_sym].fillna(1.0) print "Writing df_close.csv." df_close.to_csv('df_close.csv') print "Calculating Bollinger band values." df_bollinger_band_values = calculate_df_bollinger_band_values( ls_symbols=ls_symbols, df_close=df_close, i_lookback=i_lookback) print "Writing df_bollinger_band_values." df_bollinger_band_values.to_csv('df_bollinger_band_values.csv')
def create_orders_events(ldt_timestamps, symbols_list, event_amount): dataobj = da.DataAccess('Yahoo') ls_symbols = dataobj.get_symbols_from_list(symbols_list) ls_symbols.append('SPY') ls_keys = ['close','actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method = 'ffill') d_data[s_key] = d_data[s_key].fillna(method = 'bfill') d_data[s_key] = d_data[s_key].fillna(1.0) df_events = find_events(ls_symbols, d_data, event_amount) file_out = open( orderFile, "w" ) for col in df_events.columns: for i in range(0,len(ldt_timestamps)): date = ldt_timestamps[i] if not np.isnan(df_events.get_value(date,col)): if i+5 >= len(ldt_timestamps): date2 = ldt_timestamps[len(ldt_timestamps) - 1] else: date2 = ldt_timestamps[i+5] file_out.writelines(date.strftime('%Y,%m,%d') + "," + str(col) + ",Buy,100\n") file_out.writelines(date2.strftime('%Y,%m,%d') + "," + str(col) + ",Sell,100\n") file_out.close()
def plot(): ls_alloc = optimal_allocation_4(dt_start, dt_end, ls_symbols) dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) c_dataobj = da.DataAccess('Yahoo') ls_key = ['close'] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_key) d_data = dict(zip(ls_key, ldf_data)) df_rets = d_data['close'].copy() df_rets = df_rets.fillna(method='ffill') df_rets = df_rets.fillna(method='bfill') na_rets = df_rets.values tsu.returnize0(na_rets) na_portrets = np.sum(na_rets * ls_alloc, axis=1) na_port_total = np.cumprod(na_portrets + 1) na_component_total = np.cumprod(na_rets + 1, axis=0) plt.clf() fig = plt.figure() fig.add_subplot(111) plt.plot(ldt_timestamps, na_component_total, alpha=0.4) plt.plot(ldt_timestamps, na_port_total) ls_names = ls_symbols ls_names.append('Portfolio') plt.legend(ls_names) plt.ylabel('Cumulative Returns') plt.xlabel('Date') fig.autofmt_xdate(rotation=45)
def getTestData(): filenames = [] filenames.append('ML4T-130') start = dt.datetime(2000, 2, 1) end = dt.datetime(2012, 9, 14) timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(start, end, timeofday) dataobj = da.DataAccess('ML4Trading') keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] data = dataobj.get_data(timestamps, filenames, keys) dataDic = dict(zip(keys, data)) for key in keys: dataDic[key] = dataDic[key].fillna(method='ffill') dataDic[key] = dataDic[key].fillna(method='bfill') dataDic[key] = dataDic[key].fillna(1.0) priceTest = dataDic['actual_close'].values print len(priceTest) Xtest, Y, Ytest = train(priceTest) print Xtest print Ytest return Xtest, Ytest
def stock_price(dt_start, dt_end, ls_symbols): # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. # c_dataobj = da.DataAccess('Yahoo', cachestalltime = 0) c_dataobj = da.DataAccess('Yahoo') # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(list(zip(ls_keys, ldf_data))) # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') # d_data[s_key] = d_data[s_key].fillna(1.0) # never exec.? na_price = d_data['close'].values na_normalized_price = na_price / na_price[0, :] return na_normalized_price
def marketsim(cash, orders_file, data_item): # Read orders orders = defaultdict(list) symbols = set([]) for year, month, day, sym, action, num in csv.reader(open(orders_file, "rU")): orders[date(int(year), int(month), int(day))].append((sym, action, int(num))) symbols.add(sym) days = orders.keys() days.sort() day, end = days[0], days[-1] # Reading the Data for the list of Symbols. timestamps = du.getNYSEdays(dt.datetime(day.year,day.month,day.day), dt.datetime(end.year,end.month,end.day+1), timedelta(hours=16)) dataobj = da.DataAccess('Yahoo', cachestalltime =0) close = dataobj.get_data(timestamps, symbols, data_item) values = [] portfolio = pf.PortfolioPortfolio(cash) for i, t in enumerate(timestamps): for sym, action, num in orders[date(t.year, t.month, t.day)]: if action == 'Sell': num *= -1 portfolio.update(sym, num, close[sym][i]) entry = (t.year, t.month, t.day, portfolio.value(close, i)) values.append(entry) return values
def simulate_failed2(date_start, date_end, symbols, allocations): '''rebalances daily''' date_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(date_start, date_end, date_timeofday) c_dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) na_price = d_data['close'].values na_normalized_price = na_price / na_price[0, :] na_rets = na_normalized_price.copy() print na_rets seclist = {} for x in range(len(allocations)): seclist[x] = na_rets[:,x] * allocations[x] print seclist for sec in seclist.iterkeys(): tsu.returnize0(seclist[sec]) # tsu.returnize0(na_alloc) # # print na_alloc print seclist portrets = [] for index in range(len(seclist[0])): tempsum = 0.0 print "Index %s" % index for sec in seclist.iterkeys(): print "sec %s" % sec if np.isnan(seclist[sec][index]): print "Passing: [%s] [%s]" % (sec, index) pass else: tempsum += seclist[sec][index] portrets.append(tempsum) na_portrets = np.array(portrets) na_port_total = np.prod(na_portrets + 1) print na_portrets #na_portrets = np.sum(na_rets, axis = 1) #na_port_total = np.cumprod(portrets + 1) rf_rate = 0 vol = np.std(na_portrets) daily_ret = np.average(na_portrets) cum_ret = na_port_total sharpe = np.sqrt(252)*((daily_ret - rf_rate)/vol) return vol, daily_ret, sharpe, cum_ret
def main(): """main function""" # Construct the two symbol lists SP 500 of 2008 and 2012 dataobj = da.DataAccess('Yahoo') symbols12 = dataobj.get_symbols_from_list("sp5002012") symbols12.append('SPY') lookbackdates = 20 basic_portfolio = BasicPortfolio(symbols12, dt.datetime(2008, 1, 1), dt.datetime(2009, 12, 31)) print 'Start Looking for the events' df_events = BollingerEventTest(basic_portfolio, lookbackdates) print 'Start retrieving data from local Yahoo' d_data = load_local_data_from_yahoo(basic_portfolio.start_date, basic_portfolio.end_date, basic_portfolio.tickers) filename = "BollingerEventStudy12.9.pdf" ep.eventprofiler(df_events, d_data, i_lookback=20, i_lookforward=20, s_filename=filename, b_market_neutral=True, b_errorbars=True, s_market_sym='SPY') print 'Generate orders with the events' df_event_trading = GenerateTradingWithEvents(df_events) df_event_trading.to_csv("ordersbollinger5d.csv", index=False, header=False)
def do_benchmark_calculations(start_date, end_date, ls_symbols): # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(start_date, end_date, dt_timeofday) c_dataobj = da.DataAccess('Yahoo', cachestalltime=0) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) df_rets = d_data['close'] df_rets = df_rets.fillna(method='ffill') df_rets = df_rets.fillna(method='bfill') df_rets = df_rets.fillna(1.0) na_price = df_rets.values.copy() normalized_price = na_price / na_price[0:1] tsu.returnize0(normalized_price) cummulative_return = np.cumprod(normalized_price + 1)[-1] avg = normalized_price.mean() std = normalized_price.std() print "benchmark sharpe_ratio:", avg / std * math.sqrt(252) print "benchmark total return:", cummulative_return print "benchmark standard deviation", std print "benchmark average return", avg
def process_benchmark(dt_start, dt_end, benchmark_symbol): ldt_timestamps = du.getNYSEdays(dt_start, dt_end + dt.timedelta(days=1), dt.timedelta(hours=16)) dt_start = ldt_timestamps[0] dt_end = ldt_timestamps[-1] dataobj = da.DataAccess('Yahoo') ls_keys = ['close'] ldf_data = dataobj.get_data(ldt_timestamps, [benchmark_symbol], ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) # Extract the data/values na_prices = d_data['close'].values # Normalizing the prices to start at 1 and see relative returns na_normalized_price = na_prices / na_prices[0, :] na_benchmark_returns = na_normalized_price.copy() tsu.returnize0(na_benchmark_returns) # Calculate values statistics f_benchmark_avg_return = np.mean(na_benchmark_returns) total_ret_benchmark = np.prod(na_benchmark_returns + 1.0) stddev_benchmark = np.std(na_benchmark_returns) sharpe_benchmark = math.sqrt( 252.0) * f_benchmark_avg_return / stddev_benchmark return dt_start, dt_end, sharpe_benchmark, total_ret_benchmark, stddev_benchmark, f_benchmark_avg_return
def fetchNYSEData(dt_start, dt_end, ls_symbols): # The Time of Closing is 1600 hrs dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo', cachestalltime=0) # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) timestampsForNYSEDays = d_data['close'].index # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) # Getting the numpy ndarray of close prices. na_price = d_data['close'].values # returning the closed prices for all the days return na_price, ldt_timestamps
def getTestData292(): filenames = [] filenames.append('ML4T-292') start = dt.datetime(2006, 1, 1) end = dt.datetime(2007, 12, 31) timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(start, end, timeofday) dataobj = da.DataAccess('Yahoo') keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] data = dataobj.get_data(timestamps, filenames, keys) dataDic = dict(zip(keys, data)) for key in keys: dataDic[key] = dataDic[key].fillna(method='ffill') dataDic[key] = dataDic[key].fillna(method='bfill') dataDic[key] = dataDic[key].fillna(1.0) priceList = dataDic['actual_close'].values Xtest, Y, Ytest = train(priceList) return Xtest, Ytest
def _generate_data(self): year = 2009 startday = dt.datetime(year - 1, 12, 1) endday = dt.datetime(year + 1, 1, 31) l_symbols = ['$SPX'] #Get desired timestamps timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(startday, endday, timeofday) dataobj = da.DataAccess('Norgate') self.df_close = dataobj.get_data( \ ldt_timestamps, l_symbols, "close", verbose=True) self.df_alloc = pand.DataFrame( \ index=[dt.datetime(year, 1, 1)], \ data=[-1], columns=l_symbols) for i in range(11): self.df_alloc = self.df_alloc.append( \ pand.DataFrame(index=[dt.datetime(year, i+2, 1)], \ data=[-1], columns=l_symbols)) self.df_alloc['_CASH'] = 0.0 #Based on hand calculation using the transaction costs and slippage. self.i_open_result = 0.7541428779600005
def get_close_data(start_date, end_date, symbols): """ Returns the adjusted close prices of the symbols passed in @param start_date: start date to grab data from @param end_date: end date to grab data from @param time_of_day: nubmer of hours in the day @param symbols: symbols to get data for @return: list of adjust close prices, indexed by symbol and list of dates for trading """ # Grab the number of trading days dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(start_date, end_date, dt_timeofday) # Grab data from QSTK data. Yahoo as the data source c_dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) close_price = d_data['close'].values return close_price, ldt_timestamps
def getTrainData(): filenames = [] for i in range(0, 200): if i < 10: filename = 'ML4T-00' + str(i) elif i < 100: filename = 'ML4T-0' + str(i) else: filename = 'ML4T-' + str(i) filenames.append(filename) start = dt.datetime(2001, 1, 1) end = dt.datetime(2005, 12, 31) timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(start, end, timeofday) dataobj = da.DataAccess('Yahoo') keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] data = dataobj.get_data(timestamps, filenames, keys) dataDic = dict(zip(keys, data)) for key in keys: dataDic[key] = dataDic[key].fillna(method='ffill') dataDic[key] = dataDic[key].fillna(method='bfill') dataDic[key] = dataDic[key].fillna(1.0) priceList = dataDic['actual_close'].values Xtrain, Ytrain, Y = train(priceList) return Xtrain, Ytrain
def prices(symbol='$DJI', start=datetime.datetime(2008,1,1), end=datetime.datetime(2009,12,31)): start = util.normalize_date(start or datetime.date(2008, 1, 1)) end = util.normalize_date(end or datetime.date(2009, 12, 31)) symbol = symbol.upper() timeofday = datetime.timedelta(hours=16) timestamps = du.getNYSEdays(start, end, timeofday) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = da.get_data(timestamps, [symbol], ls_keys) d_data = dict(zip(ls_keys, ldf_data)) na_price = d_data['close'].values return na_price[:,0]
def findEvents(symbols_year, startday, endday, event, data_item="close"): dataobj = DataAccess('Yahoo') symbols = dataobj.get_symbols_from_list("sp500%d" % symbols_year) symbols.append('SPY') # Reading the Data for the list of Symbols. timestamps = getNYSEdays(startday, endday, timedelta(hours=16)) # Reading the Data print "# reading data" close = dataobj.get_data(timestamps, symbols, data_item) # Generating the Event Matrix print "# finding events" eventmat = copy.deepcopy(close) for sym in symbols: for time in timestamps: eventmat[sym][time] = NAN for symbol in symbols: event(eventmat, symbol, close[symbol], timestamps) return eventmat
def chart_series(series, market_sym='$SPX', price='actual_close', normalize=True): """Display a graph of the price history for the list of ticker symbols provided Arguments: series (dataframe, list of str, or list of tuples): datafram (Timestamp or Datetime for index) other columns are float y-axis values to be plotted list of str: 1st 3 comma or slash-separated integers are the year, month, day others are float y-axis values list of tuples: 1st 3 integers are year, month, day others are float y-axis values market_sym (str): ticker symbol of equity or comodity to plot along side the series price (str): which market data value ('close', 'actual_close', 'volume', etc) to use for the market symbol for comparison to the series normalize (bool): Whether to normalize prices to 1 at the start of the time series. """ series = util.make_dataframe(series) start = util.normalize_date(series.index[0] or datetime.datetime(2008, 1, 1)) end = util.normalize_date(series.index[-1] or datetime.datetime(2009, 12, 28)) timestamps = du.getNYSEdays(start, end, datetime.timedelta(hours=16)) if market_sym: if isinstance(market_sym, basestring): market_sym = [market_sym.upper().strip()] reference_prices = da.get_data(timestamps, market_sym, [price])[0] reference_dict = dict(zip(market_sym, reference_prices)) for sym, market_data in reference_dict.iteritems(): series[sym] = pd.Series(market_data, index=timestamps) # na_price = reference_dict[price].values # if normalize: # na_price /= na_price[0, :] series.plot() # plt.clf() # plt.plot(timestamps, na_price) # plt.legend(symbols) # plt.ylabel(price.title()) # plt.xlabel('Date') # # plt.savefig('portfolio.chart_series.pdf', format='pdf') plt.grid(True) plt.show() return series
def chart( symbols=("AAPL", "GLD", "GOOG", "$SPX", "XOM", "msft"), start=datetime.datetime(2008, 1, 1), end=datetime.datetime(2009, 12, 31), # data stops at 2013/1/1 normalize=True, ): """Display a graph of the price history for the list of ticker symbols provided Arguments: symbols (list of str): Ticker symbols like "GOOG", "AAPL", etc start (datetime): The date at the start of the period being analyzed. end (datetime): The date at the end of the period being analyzed. normalize (bool): Whether to normalize prices to 1 at the start of the time series. """ start = util.normalize_date(start or datetime.date(2008, 1, 1)) end = util.normalize_date(end or datetime.date(2009, 12, 31)) symbols = [s.upper() for s in symbols] timeofday = datetime.timedelta(hours=16) timestamps = du.getNYSEdays(start, end, timeofday) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = da.get_data(timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) na_price = d_data['close'].values if normalize: na_price /= na_price[0, :] plt.clf() plt.plot(timestamps, na_price) plt.legend(symbols) plt.ylabel('Adjusted Close') plt.xlabel('Date') plt.savefig('chart.pdf', format='pdf') plt.grid(True) plt.show() return na_price
def portfolio_prices( symbols=("AAPL", "GLD", "GOOG", "$SPX", "XOM", "msft"), start=datetime.datetime(2005, 1, 1), end=datetime.datetime(2011, 12, 31), # data stops at 2013/1/1 normalize=True, allocation=None, price_type='actual_close', ): """Calculate the Sharpe Ratio and other performance metrics for a portfolio Arguments: symbols (list of str): Ticker symbols like "GOOG", "AAPL", etc start (datetime): The date at the start of the period being analyzed. end (datetime): The date at the end of the period being analyzed. normalize (bool): Whether to normalize prices to 1 at the start of the time series. allocation (list of float): The portion of the portfolio allocated to each equity. """ symbols = normalize_symbols(symbols) start = util.normalize_date(start) end = util.normalize_date(end) if allocation is None: allocation = [1. / len(symbols)] * len(symbols) if len(allocation) < len(symbols): allocation = list(allocation) + [1. / len(symbols)] * (len(symbols) - len(allocation)) total = np.sum(allocation.sum) allocation = np.array([(float(a) / total) for a in allocation]) timestamps = du.getNYSEdays(start, end, datetime.timedelta(hours=16)) ls_keys = [price_type] ldf_data = da.get_data(timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) na_price = d_data[price_type].values if normalize: na_price /= na_price[0, :] na_price *= allocation return np.sum(na_price, axis=1)