def dataframeToData(data, length): data = data.values data = data[::-1] # reverses list to increasing time data = data[ 1::] # removes first day (often outlier data for recently IPO companies data_normaliser = preprocessing.MinMaxScaler() data_normalised = data_normaliser.fit_transform(data) percent_data = [[((data[i][j] - data[i - 2][j]) / data[i - 2][j]) for j in range(len(data[0]))] for i in range(2, len(data))] percent_normaliser = preprocessing.MinMaxScaler() percent_normalised = percent_normaliser.fit_transform(percent_data) ohlcv_data = np.array([ percent_normalised[i:i + length] for i in range(len(percent_normalised) - length) ]) open_data_normal = np.array([ percent_normalised[i + length][0] for i in range(len(percent_normalised) - length) ]) open_data_normal = np.expand_dims(open_data_normal, -1) open_data_normal = np.reshape( open_data_normal, (open_data_normal.shape[0], open_data_normal.shape[1])) open_data = np.array([ percent_data[i + length][0] for i in range(len(percent_data) - length) ]) open_data = np.expand_dims(open_data, -1) y_normaliser = preprocessing.MinMaxScaler() y_normaliser.fit(open_data) # pre_indic_data = np.array([data[i:i + length] for i in range(len(data) - length)]) # indic_data = np.array([get_indicators(pre_indic_data[i], 14) for i in range(len(pre_indic_data))]) indic_data = np.array( [get_indicators(ohlcv_data[i], 14) for i in range(len(ohlcv_data))]) # print(indic_data) current = [ percent_normalised[len(percent_normalised) - length:len(percent_normalised)] ] current.append(get_indicators(current[0], 14)) # print("high") # print(ohlcv_data) # print("low") # print(open_data_normal) # print(ohlcv_data[1][length-1]) real_data = np.array( [data[i + length][0] for i in range(len(data) - length)]) return ohlcv_data, open_data_normal, indic_data, y_normaliser, real_data, current
def prepare_data_for_metrics(portf_value, symbol): """ :param portf_value: Dataframe with prices :param symbol: Stock symbol :return: Splitting training and testing sets """ # Normalize the prices Dataframe normed = portf_value.copy() # normed = scaling_data(normed, symbol) normed['date'] = portf_value.index normed.set_index('date', inplace=True) normed.rename(columns={'Adj Close': symbol}, inplace=True) # Get indicators sym_mom, sma, q, rsi_value = get_indicators(normed, symbol) # Create momentum column normed['Momentum'] = sym_mom # Create SMA column normed['RSI'] = rsi_value # Clean nan values normed = normed.fillna(0) # Sort dataframe by index normed.sort_index() # normalize the dataset scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(normed) # Create dataset dataframe df_normed = pd.DataFrame(dataset, index=range(dataset.shape[0]), columns=range(dataset.shape[1])) # Rename columns df_normed.rename(columns={0: symbol}, inplace=True) df_normed.rename(columns={1: 'Momentum'}, inplace=True) df_normed.rename(columns={2: 'RSI'}, inplace=True) # Define X and y feature_cols = ['Momentum', 'RSI'] X = df_normed[feature_cols] y = df_normed[symbol] # split X and y into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, shuffle=False) return X_train, X_test, y_train, y_test
def testPolicy(self, symbol="JPM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2010, 12, 31), sv=100000, boll_bandr_up=0.8, boll_bandr_low=0.2, simple_moving_averager_up=1.05, simple_moving_averager_low=0.95): # Grab historical data prices, index_prices, trading_days = self.retrieve_price_data( symbol, pd.date_range(sd, ed)) # Indicators indicators = get_indicators(prices.to_frame(symbol)) #divergence = indicators["divergence"] boll_bandr = indicators["boll_bandr"] simple_moving_averager = indicators["simple_moving_averager"] stdev_divergence = indicators["stdev_divergence"] # Trading positions (strategy) df_positions = pd.Series(index=trading_days) for day in df_positions.index: previous_day = df_positions.index.get_loc(day) - 1 if previous_day < 0: df_positions.loc[day] = 0 continue elif previous_day >= 0: previous_day = df_positions.index[previous_day] #volatility over threshold do NOTHING as indicators are meaningless if stdev_divergence.loc[day] > 0.05: df_positions.loc[day] = 0 # do nothing at too volatile! else: if (boll_bandr.loc[day] > boll_bandr_up and simple_moving_averager.loc[day] > simple_moving_averager_up): # two sell signals = > sell df_positions.loc[day] = -1 elif (boll_bandr.loc[day] < boll_bandr_low and simple_moving_averager.loc[day] < simple_moving_averager_low): # Stock may be oversold, BUY signal df_positions.loc[day] = 1 # LONG else: df_positions.loc[day] = 0 # DO NOTHING else: raise Exception("Error logic") # Positions to orders df_trades = self.generate_orders(df_positions) return df_trades.to_frame(symbol)
def testPolicy(symbol=['AAPL'], sd=dt.datetime(2010, 1, 1), ed=dt.datetime(2011, 12, 31), sv=100000): dateRange = pd.date_range(sd, ed) df_prices = get_data([symbol], dateRange) prices = df_prices[symbol] prices = prices / prices[0] # Normalize to 1.0 prices = prices.to_frame() # prices becomes a series when normalize, convert back to a dataframe # Get data for SPY as a benchmark pricesSPY = df_prices['SPY'] pricesSPY = pricesSPY / pricesSPY[0] # Normalize to 1.0 # Set rolling window size rollingWindow = 20 # Get indicators for the stock (SMA, Bollinger Bands, Volatility and RSI) indicators = get_indicators(prices, symbol) # Indicators - not all of these will necessarily be needed. sma = indicators['SMA'] price_SMA = indicators['price_SMA'] BB_upper = indicators['upper band'] BB_lower = indicators['lower band'] BB_value = indicators['bb value'] volatility = indicators['volatility'] momentum = indicators['momentum'] RSI_EMWA = indicators['RSI_EMWA'] RSI_SMA = indicators['RSI_SMA'] """ Cycle through prices dataframe, BUY or SELL stock based on conditions for indicators """ numDates = prices.shape[0] holdings = 0 orders = prices.copy() orders.columns=['Order'] # holds type of order (BUY or SELL) orders[:] = '' shares = prices.copy() shares.columns = ['Shares'] # number of shares bought/sold in each order shares[:] = 0 symbols = prices.copy() symbols.columns = ['Symbol'] # Symbol of stock being traded symbols[:] = symbol RSI_SMA_top = 60 RSI_SMA_bottom = 40 momentum_top = 0.25 momentum_bottom = -0.25 volatility_top = 0.25 volatility_bottom = 0.15 BB_value_top = 0.25 BB_value_bottom = -0.25 for i, row in prices.iterrows(): # Get prices for current index currentSMA = sma.loc[i] currentPrice_SMA = price_SMA.loc[i] currentBB_value = BB_value.loc[i] currentMomentum = momentum.loc[i] currentVolatility = volatility.loc[i] currentRSI_SMA = RSI_SMA.loc[i] currentPrice = row[symbol] if (currentRSI_SMA > RSI_SMA_top) and (currentBB_value>BB_value_top) and (holdings<1000): orders.loc[i]['Order'] = 'BUY' if holdings == 0: shares.loc[i]['Shares'] = 1000 holdings += 1000 else: shares.loc[i]['Shares'] = 2000 holdings += 2000 elif (currentRSI_SMA < RSI_SMA_bottom) and (currentBB_value<BB_value_bottom) and (holdings>-1000): orders.loc[i]['Order'] = 'SELL' if holdings == 0: shares.loc[i]['Shares'] = 1000 holdings -= 1000 else: shares.loc[i]['Shares'] = 2000 holdings -= 2000 trades = pd.concat([symbols, orders, shares], axis=1) trades.columns = ['Symbol', 'Order', 'Shares'] trades = trades[trades.Shares != 0] return trades
def preprocess_data(df): if df.columns[0] == 'Date': df = df.set_index('Date') df = indicators.get_indicators(df) return df
import sys import matplotlib.pyplot as plt import csv import indicators import marketsim start_date = dt.datetime(2006, 01, 01) end_date = dt.datetime(2009, 12, 31) # start_date = dt.datetime(2010, 01, 01) # end_date = dt.datetime(2010, 12, 31) symbols = ['IBM'] lookback = 14 indicatorsDict = indicators.get_indicators(symbols, start_date, end_date, lookback) smaR = indicatorsDict['smaR'] price = indicatorsDict['price'] sma = indicatorsDict['sma'] bbp = indicatorsDict['bbp'] rolling_std = indicatorsDict['rolling_std'] top_band = indicatorsDict['top_band'] bottom_band = indicatorsDict['bottom_band'] rsi = indicatorsDict['rsi'] daily_rets = indicatorsDict['daily_rets'] up_gain = indicatorsDict['up_gain'] down_loss = indicatorsDict['down_loss'] rs = indicatorsDict['rs'] mom = indicatorsDict['mom'] ### Use the four indicators to make some kind of trading decision for each day.
def test_Manual(data='test', graph=True): test = data == 'test' if test: start_date = dt.datetime(2010, 01, 01) end_date = dt.datetime(2010, 12, 31) file = 'test.csv' else: start_date = dt.datetime(2006, 01, 01) end_date = dt.datetime(2009, 12, 31) file = 'train.csv' display = 'Momentum' graph_result = graph graph_extra = False symbols = ['IBM'] # symbols = ['HD'] lookback = 14 startval = 100000 data = indicators.get_indicators(symbols, start_date, end_date, lookback) orders = build_orders(data) portvals = helpers.compute_portvals(orders, start_date, end_date, startval) benchvals = helpers.compute_portvals2(file, startval) # print portvals prices = util.get_data(['IBM', 'SPY'], pd.date_range(start_date, end_date)) norm_portvals = helpers.get_norm_data(portvals) norm_benchvals = helpers.get_norm_data(benchvals) # print prices # print norm_portvals # print norm_SPY if graph_result: plt.figure(0) df_temp = pd.concat([norm_portvals, norm_benchvals], keys=['Portfolio', 'Benchmark'], axis=1) df_temp.ix[0, 0] = 1 df_temp['Portfolio'] = df_temp['Portfolio'].fillna(method='ffill') # print df_temp plt.rc('axes', prop_cycle=(cycler('color', ['blue', 'black']))) plt.plot(df_temp) plt.legend(['Portfolio', 'Benchmark'], loc='upper left') plt.xticks(rotation=45) plt.ylabel('Price') plt.xlabel('Date') plt.grid() plt.title('Rule-based Portfolio vs. IBM') curPos = 'out' for order in orders.index: day = orders.ix[order, 'Date'] ord = orders.ix[order, 'Order'] if curPos == 'out' and ord == 'BUY': color = 'green' curPos = 'long' elif curPos == 'out' and ord == 'SELL': color = 'red' curPos = 'short' else: color = 'black' curPos = 'out' plt.axvline(day, color=color, linewidth=2) if graph_extra: plt.figure(1) df_temp = pd.concat([data[display]['IBM']], keys=['IBM'], axis=1) df_temp = df_temp.fillna(method='bfill') plt.plot(df_temp) plt.legend([display], loc='upper left') plt.xticks(rotation=45) plt.ylabel(display) plt.xlabel('Date') plt.grid() plt.title(display) plt.show() return norm_portvals, norm_benchvals
def testPolicy(self, symbols=['JPM'], sd=dt.datetime(2010, 1, 1), ed=dt.datetime(2011, 12, 31), sv=100000): positions = pd.DataFrame(columns=['Date', 'Position']) sma, bbp, so, lookback, price = get_indicators(symbols, sd, ed) orders = price.copy() orders.iloc[:, :] = np.NaN prev = 0 for day in range(lookback + 1, price.shape[0]): date = price.iloc[day].name if prev == 0: positions = positions.append({ 'Date': date, 'Position': 0 }, ignore_index=True) prev = date positions = positions.append( { 'Date': prev, 'Position': self.check_value(sma.iloc[day - 1][0], bbp.iloc[day - 1][0], so.iloc[day - 1][5]) }, ignore_index=True) prev = date if date == ed: positions = positions.append( { 'Date': date, 'Position': self.check_value(sma.iloc[day][0], bbp.iloc[day][0], so.iloc[day][5]) }, ignore_index=True) holding_orders = self.get_order(positions, symbols[0]) # manual_strategy = compute_portvals(holding_orders, price, sd, ed, sv, 9.95, 0.005) # norm_manual = manual_strategy / manual_strategy[0] # # benchmark = self.get_benchmark(symbols[0], price, sd, ed, sv) # norm_benchmark = benchmark / benchmark[0] # Plotting # fig1, ax = plt.subplots() # norm_manual.plot(label='Manual Strategy', color='#d63729') # for i, row in positions.iterrows(): # if row.values[1] == 1: # ax.axvline(x=row.values[0], alpha=0.3, color='blue') # elif row.values[1] == -1: # ax.axvline(x=row.values[0], alpha=0.3, color='black') # norm_benchmark.plot(label='Benchmark', color='#20b049') # plt.title('Manual Strategy In Sample') # plt.ylabel('Normalized Portfolio Value') # plt.xlabel('Date') # plt.legend() # plt.grid() # fig1.savefig("MAN-InSample.png") # plt.close(fig1) holding_orders = holding_orders.set_index('Date') holding_orders = holding_orders[1:] return holding_orders
def normalize_train_test(symbols=["AAPL"],train_dates=pd.date_range("2008-01-01","2009-12-31"), test_dates=pd.date_range("2010-01-01","2011-12-31"),sma_window=20, momentum_window=7,MFI_window=14,expected_up=1,expected_down=1,holddays=21): price,sma,price_sma_ratio,bollinger_value,bb_upper,bb_lower,mfi,momentum=get_indicators(symbols,train_dates,sma_window,MFI_window,momentum_window) norm_mfi,nmfim,nmfisd=normalize_indicator(mfi[symbols]) norm_bbp,nbbpm,nbbpsd=normalize_indicator(bollinger_value[symbols]) norm_psr,npsrm,npsrsd=normalize_indicator(price_sma_ratio[symbols]) norm_spymfi,nspymfim,nspymfisd=normalize_indicator(mfi["SPY"]) norm_momentum,nmomentumm,nmomentumsd=normalize_indicator(momentum[symbols]) #calculate 21 day return period_return=price.copy() period_return.ix[:-holddays,:]=price.ix[holddays:,:].values/price.ix[:-holddays,:]-1 period_return.ix[-holddays:,:]=0 #calculate median changes in either direction pos=period_return[period_return>0] neg=period_return[period_return<0] median_pos_period_resturn=pos[symbols].median() median_neg_period_resturn=neg[symbols].median() #translate Y labels from 21 day return labelY=pd.DataFrame(data=0,index=price.index,columns=price.columns) labelY[period_return>=median_pos_period_resturn*expected_up]=1 labelY[period_return<=median_neg_period_resturn*expected_down]=-1 del labelY["SPY"] trainXY=np.column_stack((norm_momentum,norm_bbp,norm_psr,norm_mfi,norm_spymfi,labelY)) trainXY=pd.DataFrame(trainXY,index=labelY.index) trainXY=trainXY.dropna() trainDate=trainXY.index trainXY=np.array(trainXY) trainX=trainXY[:,:-1] trainY=trainXY[:,-1] testprice,testsma,testprice_sma_ratio,testbollinger_value,testbb_upper,testbb_lower,testmfi,testmomentum=get_indicators(symbols,test_dates,sma_window,MFI_window) norm_testmfi=(testmfi[symbols]-nmfim[symbols])/nmfisd[symbols] norm_testbbp=(testbollinger_value[symbols]-nbbpm[symbols])/nbbpsd[symbols] norm_testpsr=(testprice_sma_ratio[symbols]-npsrm[symbols])/npsrsd[symbols] norm_testspymfi=(testmfi["SPY"]-nspymfim)/nspymfisd norm_testmomentum=(testmomentum[symbols]-nmomentumm)/nmomentumsd #calculate 21 day return testperiod_return=testprice.copy() testperiod_return.ix[:-holddays,:]=testprice.ix[holddays:,:].values/testprice.ix[:-holddays,:]-1 testperiod_return.ix[-holddays:,:]=0 #translate Y labels from 21 day return testlabelY=pd.DataFrame(data=0,index=testprice.index,columns=testprice.columns) testlabelY[testperiod_return>=median_pos_period_resturn*expected_up]=1 testlabelY[testperiod_return<=median_neg_period_resturn*expected_down]=-1 del testlabelY["SPY"] testXY=np.column_stack((norm_testmomentum,norm_testbbp,norm_testpsr,norm_testmfi,norm_testspymfi,testlabelY)) testXY=pd.DataFrame(testXY,index=testlabelY.index) testXY=testXY.dropna() testDate=testXY.index testXY=np.array(testXY) testX=testXY[:,:-1] testY=testXY[:,-1] return trainX,trainY,trainDate,testX,testY,testDate
def addEvidence(self, symbol = "IBM", \ sd=dt.datetime(2008,1,1), \ ed=dt.datetime(2009,1,1), \ sv = 10000): # Grab in-sample data prices, trading_days = self.get_historical_data( symbol, pd.date_range(sd, ed)) daily_returns = get_daily_returns(prices) # Indicators indicators = get_indicators(prices.to_frame(symbol)) #self.mean = indicators.mean() #self.std = indicators.std() #if (self.std == 0).any(): # self.std = 1 #std_indicators = self.normalize_indicators(indicators, self.mean, self.std) #stdev_divergence = std_indicators["stdev_divergence"] #boll_bandr = std_indicators["boll_bandr"] #simple_moving_averager = std_indicators["simple_moving_averager"] boll_bandr = indicators["boll_bandr"] simple_moving_averager = indicators["simple_moving_averager"] stdev_divergence = indicators["stdev_divergence"] # Discretize ## MACD _, self.stdev_divergence_bins = pd.qcut(stdev_divergence, self.n_bins, retbins=True, labels=False) stdev_divergence_ind = self.discretize(stdev_divergence, self.stdev_divergence_bins, self.n_bins) stdev_divergence_ind = pd.Series(stdev_divergence_ind, index=indicators.index) ## Bollinger Bands _, self.boll_bandr_bins = pd.qcut(boll_bandr, self.n_bins, retbins=True, labels=False) boll_bandr_ind = self.discretize(boll_bandr, self.boll_bandr_bins, self.n_bins) boll_bandr_ind = pd.Series(boll_bandr_ind, index=indicators.index) ## SMA _, self.simple_moving_averager_bins = pd.qcut(simple_moving_averager, self.n_bins, retbins=True, labels=False) simple_moving_averager_ind = self.discretize( simple_moving_averager, self.simple_moving_averager_bins, self.n_bins) simple_moving_averager_ind = pd.Series(simple_moving_averager_ind, index=indicators.index) # Compute states of in-sample data discretized_indicators = pd.DataFrame(index=indicators.index) discretized_indicators[ "stdev_divergence"] = stdev_divergence_ind.values discretized_indicators["boll_bandr"] = boll_bandr_ind.values discretized_indicators[ "simple_moving_averager"] = simple_moving_averager_ind.values discretized_indicators["mapping"] = stdev_divergence_ind.astype( str) + boll_bandr_ind.astype( str) + simple_moving_averager_ind.astype(str) #discretized_indicators["mapping"] = stdev_divergence_ind.astype(str) + boll_bandr_ind.astype(str) #+ simple_moving_averager_ind.astype(str) discretized_indicators["state"] = discretized_indicators[ "mapping"].astype(np.int) states = discretized_indicators["state"] # QLearner self.learner = ql.QLearner( num_states=self.num_states, num_actions=3, alpha=0.2, #alpha=0.2, #gamma=0.9, gamma=0.9, rar=0.5, radr=0.75, dyna=0, verbose=self.verbose, seed=self.seed) # Training loop i = 0 converged = False df_trades_previous = None while (i <= self.min_iter) or (i <= self.max_iter and not converged): # Set state with indicators of this first day action = self.learner.querysetstate(states.iloc[0]) holding = 0 df_trades = pd.Series(index=states.index) for day, state in states.iteritems(): reward = holding * daily_returns.loc[day] if action != 2: # LONG or SHORT? reward *= (1 - self.impact) action = self.learner.query(state, reward) if action == 0: # SHORT df_trades.loc[day] = { -1000: 0, 0: -1000, 1000: -2000, }.get(holding) elif action == 1: # LONG df_trades.loc[day] = { -1000: 2000, 0: 1000, 1000: 0, }.get(holding) elif action == 2: # DO NOTHING df_trades.loc[day] = 0 else: raise Exception( "Unknown trading action to take: {}".format(action)) holding += df_trades.loc[day] # Check for convergence if (df_trades_previous is not None) and (df_trades.equals(df_trades_previous)): converged = True df_trades_previous = df_trades i += 1
def testPolicy(self, symbol = "IBM", \ sd=dt.datetime(2009,1,1), \ ed=dt.datetime(2010,1,1), \ sv = 10000): # Grab out-of-sample data prices, trading_days = self.get_historical_data( symbol, pd.date_range(sd, ed)) # Indicators indicators = get_indicators(prices.to_frame(symbol)) #std_indicators = self.normalize_indicators(indicators, self.mean, self.std) #stdev_divergence = std_indicators["stdev"] #boll_bandr = std_indicators["boll_bandr"] #simple_moving_averager = std_indicators["simple_moving_averager"] #std_indicators = self.normalize_indicators(indicators, self.mean, self.std) stdev_divergence = indicators["stdev_divergence"] boll_bandr = indicators["boll_bandr"] simple_moving_averager = indicators["simple_moving_averager"] # Discretize ## MACD stdev_divergence_ind = self.discretize(stdev_divergence, self.stdev_divergence_bins, self.n_bins) stdev_divergence_ind = pd.Series(stdev_divergence_ind, index=indicators.index) ## Bollinger Bands boll_bandr_ind = self.discretize(boll_bandr, self.boll_bandr_bins, self.n_bins) boll_bandr_ind = pd.Series(boll_bandr_ind, index=indicators.index) ## SMA simple_moving_averager_ind = self.discretize( simple_moving_averager, self.simple_moving_averager_bins, self.n_bins) simple_moving_averager_ind = pd.Series(simple_moving_averager_ind, index=indicators.index) # Compute states of out-of-sample data discretized_indicators = pd.DataFrame(index=indicators.index) discretized_indicators[ "stdev_divergence"] = stdev_divergence_ind.values discretized_indicators["boll_bandr"] = boll_bandr_ind.values discretized_indicators[ "simple_moving_averager"] = simple_moving_averager_ind.values discretized_indicators["mapping"] = stdev_divergence_ind.astype( str) + boll_bandr_ind.astype( str) + simple_moving_averager_ind.astype(str) # discretized_indicators["mapping"] = stdev_divergence_ind.astype(str) + boll_bandr_ind.astype(str) + simple_moving_averager_ind.astype(str) discretized_indicators["state"] = discretized_indicators[ "mapping"].astype(np.int) states = discretized_indicators["state"] holding = 0 df_trades = pd.Series(index=states.index) for day, state in states.iteritems(): action = self.learner.querysetstate(state, random=False) if action == 0: # SHORT df_trades.loc[day] = { -1000: 0, 0: -1000, 1000: -2000, }.get(holding) elif action == 1: # LONG df_trades.loc[day] = { -1000: 2000, 0: 1000, 1000: 0, }.get(holding) elif action == 2: # DO NOTHING df_trades.loc[day] = 0 else: raise Exception( "Unknown trading action to take: {}".format(action)) holding += df_trades.loc[day] return df_trades.to_frame(symbol)