def add_volume_indicators(data: pd.DataFrame) -> pd.DataFrame: """Adds the volume indicators. Parameters ---------- data : pd.DataFrame A dataframe with daily stock values. Must include: open, high, low, close and volume. It should also be sorted in a descending manner. Returns ------- pd.DataFrame The input dataframe with the indicators added. """ chaikin = ChaikinMoneyFlowIndicator(data['high'], data['low'], data['close'], data['volume']) mfi = MFIIndicator(data['high'], data['low'], data['close'], data['volume']) obv = OnBalanceVolumeIndicator(data['close'], data['volume']) data.loc[:, 'chaikin'] = chaikin.chaikin_money_flow() data.loc[:, 'mfi'] = mfi.money_flow_index() data.loc[:, 'obv'] = obv.on_balance_volume() return data
def setUp(self): self._df = pd.read_csv(self._filename, sep=',') self._indicator = MFIIndicator(high=self._df['High'], low=self._df['Low'], close=self._df['Close'], volume=self._df['Volume'], n=14, fillna=False)
class TestMFIIndicator(unittest.TestCase): """ https://school.stockcharts.com/doku.php?id=technical_indicators:money_flow_index_mfi """ _filename = 'ta/tests/data/cs-mfi.csv' def setUp(self): self._df = pd.read_csv(self._filename, sep=',') self._indicator = MFIIndicator( high=self._df['High'], low=self._df['Low'], close=self._df['Close'], volume=self._df['Volume'], n=14, fillna=False) def tearDown(self): del(self._df) def test_mfi(self): target = 'MFI' result = self._indicator.money_flow_index() pd.testing.assert_series_equal(self._df[target].tail(), result.tail(), check_names=False) def test_mfi2(self): target = 'MFI' result = money_flow_index( high=self._df['High'], low=self._df['Low'], close=self._df['Close'], volume=self._df['Volume'], n=14, fillna=False) pd.testing.assert_series_equal(self._df[target].tail(), result.tail(), check_names=False)
def setUpClass(cls): cls._df = pd.read_csv(cls._filename, sep=',') cls._params = dict(high=cls._df['High'], low=cls._df['Low'], close=cls._df['Close'], volume=cls._df['Volume'], n=14, fillna=False) cls._indicator = MFIIndicator(**cls._params)
def add_volume_ta(df: pd.DataFrame, high: str, low: str, close: str, volume: str, fillna: bool = False, colprefix: str = "") -> pd.DataFrame: """Add volume technical analysis features to dataframe. Args: df (pandas.core.frame.DataFrame): Dataframe base. high (str): Name of 'high' column. low (str): Name of 'low' column. close (str): Name of 'close' column. volume (str): Name of 'volume' column. fillna(bool): if True, fill nan values. colprefix(str): Prefix column names inserted Returns: pandas.core.frame.DataFrame: Dataframe with new features. """ # Accumulation Distribution Index df[f'{colprefix}volume_adi'] = AccDistIndexIndicator( high=df[high], low=df[low], close=df[close], volume=df[volume], fillna=fillna).acc_dist_index() # On Balance Volume df[f'{colprefix}volume_obv'] = OnBalanceVolumeIndicator( close=df[close], volume=df[volume], fillna=fillna).on_balance_volume() # Chaikin Money Flow df[f'{colprefix}volume_cmf'] = ChaikinMoneyFlowIndicator( high=df[high], low=df[low], close=df[close], volume=df[volume], fillna=fillna).chaikin_money_flow() # Force Index df[f'{colprefix}volume_fi'] = ForceIndexIndicator( close=df[close], volume=df[volume], n=13, fillna=fillna).force_index() # Money Flow Indicator df[f'{colprefix}volume_mfi'] = MFIIndicator( high=df[high], low=df[low], close=df[close], volume=df[volume], n=14, fillna=fillna).money_flow_index() # Ease of Movement indicator = EaseOfMovementIndicator(high=df[high], low=df[low], volume=df[volume], n=14, fillna=fillna) df[f'{colprefix}volume_em'] = indicator.ease_of_movement() df[f'{colprefix}volume_sma_em'] = indicator.sma_ease_of_movement() # Volume Price Trend df[f'{colprefix}volume_vpt'] = VolumePriceTrendIndicator( close=df[close], volume=df[volume], fillna=fillna).volume_price_trend() # Negative Volume Index df[f'{colprefix}volume_nvi'] = NegativeVolumeIndexIndicator( close=df[close], volume=df[volume], fillna=fillna).negative_volume_index() # Volume Weighted Average Price df[f'{colprefix}volume_vwap'] = VolumeWeightedAveragePrice( high=df[high], low=df[low], close=df[close], volume=df[volume], n=14, fillna=fillna ).volume_weighted_average_price() return df
def setUpClass(cls): cls._df = pd.read_csv(cls._filename, sep=",") cls._params = dict( high=cls._df["High"], low=cls._df["Low"], close=cls._df["Close"], volume=cls._df["Volume"], window=14, fillna=False, ) cls._indicator = MFIIndicator(**cls._params)
def action(self, indicator): # Derive the action based on past data # action: 1 means buy, -1 means sell, 0 means do nothing high, low, close, volume = indicator['h'], indicator['l'], indicator['c'], indicator['v'] indicator['volume_mfi'] = MFIIndicator(high=high, low=low, close=close, volume=volume).money_flow_index() indicator['volume_mfi_prev'] = indicator['volume_mfi'].shift(1) # If 80 -> 79: Sell, If 19 -> 20: Buy indicator['sell'] = ((indicator['volume_mfi_prev'] >= self.parameters['mfi80']) & \ (indicator['volume_mfi'] < self.parameters['mfi80'])).astype(int) indicator['buy'] = ((indicator['volume_mfi_prev'] < self.parameters['mfi20']) & \ (indicator['volume_mfi'] >= self.parameters['mfi20'])).astype(int) indicator['action'] = indicator['buy'] - indicator['sell']
def action(self, indicator): # Derive the action based on past data # action: 1 means buy, -1 means sell, 0 means do nothing high, low, close, volume = ( indicator["h"], indicator["l"], indicator["c"], indicator["v"], ) indicator["volume_mfi"] = MFIIndicator( high=high, low=low, close=close, volume=volume).money_flow_index() indicator["volume_mfi_prev"] = indicator["volume_mfi"].shift(1) # If 80 -> 79: Sell, If 19 -> 20: Buy indicator["sell"] = ( (indicator["volume_mfi_prev"] >= self.parameters["mfi80"]) & (indicator["volume_mfi"] < self.parameters["mfi80"])).astype(int) indicator["buy"] = ( (indicator["volume_mfi_prev"] < self.parameters["mfi20"]) & (indicator["volume_mfi"] >= self.parameters["mfi20"])).astype(int) indicator["action"] = indicator["buy"] - indicator["sell"]
def applyIndicator(self, full_company_price): self.data = full_company_price high = self.data['high'] low = self.data['low'] close = self.data['close'] volume = self.data['volume'] EMA12 = EMAIndicator(close, 12, fillna=False) EMA30 = EMAIndicator(close, 20, fillna=False) EMA60 = EMAIndicator(close, 60, fillna=False) MACD1226 = MACD(close, 26, 12, 9, fillna=False) MACD2452 = MACD(close, 52, 24, 18, fillna=False) ROC12 = ROCIndicator(close, 12, fillna=False) ROC30 = ROCIndicator(close, 30, fillna=False) ROC60 = ROCIndicator(close, 60, fillna=False) RSI14 = RSIIndicator(close, 14, fillna=False) RSI28 = RSIIndicator(close, 28, fillna=False) RSI60 = RSIIndicator(close, 60, fillna=False) AROON25 = AroonIndicator(close, 25, fillna=False) AROON50 = AroonIndicator(close, 50, fillna=False) AROON80 = AroonIndicator(close, 80, fillna=False) MFI14 = MFIIndicator(high, low, close, volume, 14, fillna=False) MFI28 = MFIIndicator(high, low, close, volume, 28, fillna=False) MFI80 = MFIIndicator(high, low, close, volume, 80, fillna=False) CCI20 = CCIIndicator(high, low, close, 20, 0.015, fillna=False) CCI40 = CCIIndicator(high, low, close, 40, 0.015, fillna=False) CCI100 = CCIIndicator(high, low, close, 100, 0.015, fillna=False) WILLR14 = WilliamsRIndicator(high, low, close, 14, fillna=False) WILLR28 = WilliamsRIndicator(high, low, close, 28, fillna=False) WILLR60 = WilliamsRIndicator(high, low, close, 60, fillna=False) BBANDS20 = BollingerBands(close, 20, 2, fillna=False) KC20 = KeltnerChannel(high, low, close, 20, 10, fillna=False) STOCH14 = StochasticOscillator(high, low, close, 14, 3, fillna=False) STOCH28 = StochasticOscillator(high, low, close, 28, 6, fillna=False) STOCH60 = StochasticOscillator(high, low, close, 60, 12, fillna=False) CMI20 = ChaikinMoneyFlowIndicator(high, low, close, volume, 20, fillna=False) CMI40 = ChaikinMoneyFlowIndicator(high, low, close, volume, 40, fillna=False) CMI100 = ChaikinMoneyFlowIndicator(high, low, close, volume, 100, fillna=False) self.data['ema12'] = (close - EMA12.ema_indicator()) / close self.data['ema30'] = (close - EMA30.ema_indicator()) / close self.data['ema60'] = (close - EMA60.ema_indicator()) / close self.data['macd1226'] = MACD1226.macd() - MACD1226.macd_signal() self.data['macd2452'] = MACD2452.macd() - MACD2452.macd_signal() self.data['roc12'] = ROC12.roc() self.data['roc30'] = ROC30.roc() self.data['roc60'] = ROC60.roc() self.data['rsi14'] = RSI14.rsi() self.data['rsi28'] = RSI28.rsi() self.data['rsi60'] = RSI60.rsi() self.data['aroon25'] = AROON25.aroon_indicator() self.data['aroon50'] = AROON50.aroon_indicator() self.data['aroon80'] = AROON80.aroon_indicator() self.data['mfi14'] = MFI14.money_flow_index() self.data['mfi28'] = MFI28.money_flow_index() self.data['mfi80'] = MFI80.money_flow_index() self.data['cci20'] = CCI20.cci() self.data['cci40'] = CCI40.cci() self.data['cci100'] = CCI100.cci() self.data['willr14'] = WILLR14.wr() self.data['willr28'] = WILLR28.wr() self.data['willr60'] = WILLR60.wr() self.data['bband20up'] = (BBANDS20.bollinger_hband() - close) / close self.data['bband20down'] = (close - BBANDS20.bollinger_lband()) / close self.data['stoch14'] = STOCH14.stoch() self.data['stoch28'] = STOCH28.stoch() self.data['stoch60'] = STOCH60.stoch() self.data['cmi20'] = CMI20.chaikin_money_flow() self.data['cmi40'] = CMI40.chaikin_money_flow() self.data['cmi100'] = CMI100.chaikin_money_flow() self.data['kc20up'] = (KC20.keltner_channel_hband() - close) / close self.data['kc20down'] = (close - KC20.keltner_channel_lband()) / close return self.data
def handle(self, *args, **options): # import pdb # pdb.set_trace() if not options['update']: NSETechnical.objects.all().delete() symbols = Symbol.objects.all() for symbol in symbols: nse_history_data = NSEHistoricalData.objects.filter( symbol__symbol_name=symbol).order_by('timestamp') if not nse_history_data: continue nse_technical = pd.DataFrame( list( nse_history_data.values('timestamp', 'open', 'high', 'low', 'close', 'total_traded_quantity'))) ''' Moving average convergence divergence ''' indicator_macd = MACD(close=nse_technical['close'], window_slow=26, window_fast=12, window_sign=9, fillna=False) nse_technical["trend_macd"] = indicator_macd.macd() nse_technical["trend_macd_signal"] = indicator_macd.macd_signal() nse_technical["trend_macd_diff"] = indicator_macd.macd_diff() ''' Simple Moving Average ''' nse_technical["trend_sma_fast"] = SMAIndicator( close=nse_technical['close'], window=12, fillna=False).sma_indicator() nse_technical["trend_sma_slow"] = SMAIndicator( close=nse_technical['close'], window=26, fillna=False).sma_indicator() ''' Exponential Moving Average ''' nse_technical["trend_ema_fast"] = EMAIndicator( close=nse_technical['close'], window=12, fillna=False).ema_indicator() nse_technical["trend_ema_slow"] = EMAIndicator( close=nse_technical['close'], window=26, fillna=False).ema_indicator() ''' Ichimoku Indicator ''' indicator_ichi = IchimokuIndicator( high=nse_technical['high'], low=nse_technical['low'], window1=9, window2=26, window3=52, visual=False, fillna=False, ) nse_technical[ "trend_ichimoku_conv"] = indicator_ichi.ichimoku_conversion_line( ) nse_technical[ "trend_ichimoku_base"] = indicator_ichi.ichimoku_base_line() nse_technical["trend_ichimoku_a"] = indicator_ichi.ichimoku_a() nse_technical["trend_ichimoku_b"] = indicator_ichi.ichimoku_b() indicator_ichi_visual = IchimokuIndicator( high=nse_technical['high'], low=nse_technical['low'], window1=9, window2=26, window3=52, visual=True, fillna=False, ) nse_technical[ "trend_visual_ichimoku_a"] = indicator_ichi_visual.ichimoku_a( ) nse_technical[ "trend_visual_ichimoku_b"] = indicator_ichi_visual.ichimoku_b( ) ''' Bollinger Band ''' indicator_bb = BollingerBands(close=nse_technical['close'], window=20, window_dev=2, fillna=False) nse_technical["volatility_bbm"] = indicator_bb.bollinger_mavg() nse_technical["volatility_bbh"] = indicator_bb.bollinger_hband() nse_technical["volatility_bbl"] = indicator_bb.bollinger_lband() nse_technical["volatility_bbw"] = indicator_bb.bollinger_wband() nse_technical["volatility_bbp"] = indicator_bb.bollinger_pband() nse_technical[ "volatility_bbhi"] = indicator_bb.bollinger_hband_indicator() nse_technical[ "volatility_bbli"] = indicator_bb.bollinger_lband_indicator() ''' Accumulation Distribution Index ''' nse_technical["volume_adi"] = AccDistIndexIndicator( high=nse_technical['high'], low=nse_technical['low'], close=nse_technical['close'], volume=nse_technical['total_traded_quantity'], fillna=False).acc_dist_index() ''' Money Flow Index ''' nse_technical["volume_mfi"] = MFIIndicator( high=nse_technical['high'], low=nse_technical['low'], close=nse_technical['close'], volume=nse_technical['total_traded_quantity'], window=14, fillna=False, ).money_flow_index() ''' Relative Strength Index (RSI) ''' nse_technical["momentum_rsi"] = RSIIndicator( close=nse_technical['close'], window=14, fillna=False).rsi() ''' Stoch RSI (StochRSI) ''' indicator_srsi = StochRSIIndicator(close=nse_technical['close'], window=14, smooth1=3, smooth2=3, fillna=False) nse_technical["momentum_stoch_rsi"] = indicator_srsi.stochrsi() nse_technical["momentum_stoch_rsi_k"] = indicator_srsi.stochrsi_k() nse_technical["momentum_stoch_rsi_d"] = indicator_srsi.stochrsi_d() nse_technical.replace({np.nan: None}, inplace=True) nse_technical.replace([np.inf, -np.inf], None, inplace=True) list_to_create = [] list_to_update = [] for index in range(len(nse_history_data) - 1, -1, -1): data = nse_history_data[index] if data.technicals: break technical = NSETechnical( nse_historical_data=data, trend_macd=nse_technical['trend_macd'][index], trend_macd_signal=nse_technical['trend_macd_signal'] [index], trend_macd_diff=nse_technical['trend_macd_diff'][index], trend_sma_fast=nse_technical['trend_sma_fast'][index], trend_sma_slow=nse_technical['trend_sma_slow'][index], trend_ema_fast=nse_technical['trend_ema_fast'][index], trend_ema_slow=nse_technical['trend_ema_slow'][index], trend_ichimoku_conv=nse_technical['trend_ichimoku_conv'] [index], trend_ichimoku_base=nse_technical['trend_ichimoku_base'] [index], trend_ichimoku_a=nse_technical['trend_ichimoku_a'][index], trend_ichimoku_b=nse_technical['trend_ichimoku_b'][index], trend_visual_ichimoku_a=nse_technical[ 'trend_visual_ichimoku_a'][index], trend_visual_ichimoku_b=nse_technical[ 'trend_visual_ichimoku_b'][index], volatility_bbm=nse_technical['volatility_bbm'][index], volatility_bbh=nse_technical['volatility_bbh'][index], volatility_bbl=nse_technical['volatility_bbl'][index], volatility_bbw=nse_technical['volatility_bbw'][index], volatility_bbp=nse_technical['volatility_bbp'][index], volatility_bbhi=nse_technical['volatility_bbhi'][index], volatility_bbli=nse_technical['volatility_bbli'][index], volume_adi=nse_technical['volume_adi'][index], volume_mfi=nse_technical['volume_mfi'][index], momentum_rsi=nse_technical['momentum_rsi'][index], momentum_stoch_rsi=nse_technical['momentum_stoch_rsi'] [index], momentum_stoch_rsi_k=nse_technical['momentum_stoch_rsi_k'] [index], momentum_stoch_rsi_d=nse_technical['momentum_stoch_rsi_d'] [index]) data.technicals = True list_to_update.append(data) list_to_create.append(technical) NSETechnical.objects.bulk_create(list_to_create) NSEHistoricalData.objects.bulk_update(list_to_update, ['technicals']) print(f"Technicals updated for {symbol}")
def createDataset(self, symbol: str, startDate, endDate, useAllIndicators=True, isAugmenting=False, timePeriodForOutputs=24): """ Creates a dataset. Please make sure that the start and end dates are the beginnings of days. :param symbol: e.g. "BTCUSDT" :param startDate: e.g. datetime(year=2020, month=1, day=1) :param endDate: e.g. datetime(year=2020, month=2, day=1) :param useAllIndicators: if False, only uses the minimum indicators :param isAugmenting: used by createAugmentedDataset when augmenting. :param timePeriodForOutputs: if set to 24, this will generate the labels (percentiles) for the next 24 hours after the 15-day period that appears in the input. """ # These are time-related variables. timezone = "Etc/GMT-0" timezone = pytz.timezone(timezone) outputStartDate = startDate # We need to go back a little earlier to generate indicators such as RSI. startDate -= timedelta(days=DAYS_IN_AN_INPUT + 60) endDate = timezone.localize(endDate) startDate = timezone.localize(startDate) # outputStartDate = timezone.localize(outputStartDate) # We will be collecting our final features and labels in here: self.inputData = [] self.outputData = [] # This dataframe has all the raw data we need to generate the dataset. df = self.dataObtainer.getHistoricalDataAsDataframe(symbol) # First, we will gather all of the means for our inputs... closeMeans = [] volumeMeans = [] # ... also, we will gather the outputs, which represent the # distributions of the next day prices. output15thPercentiles = [] output25thPercentiles = [] output35thPercentiles = [] outputMedians = [] output65thPercentiles = [] output75thPercentiles = [] output85thPercentiles = [] # We will use this to normalize our outputs by dividing them by the # mean price of the last (latest/most recent) day in our input. priceMeansToDivideLabelsBy = [] volumeMeansToDivideLabelsBy = [] date = startDate # For augmentation: phaseShift = uniform(0, np.pi * 2) count = 0 # Now we will be collecting the input prices, input volumes, and output # percentiles. while date < endDate: print("Processing", date, "/", endDate) # First, we will collect the start and end dates for this input # point (which consists of 3 hours of data if that is our input # time interval). Then we calculate the mean price and volume for # this input data point. startIndex = df.index[df["Timestamp"] == date].tolist() # If this if condition is true, then we may be missing some data in # our dataset. I think this happens during times when Binance was # down. In this case, we just use the previous data. if len(startIndex) == 0: date += self._dataTimeInterval closeMeans.append(closeMeans[-1]) volumeMeans.append(volumeMeans[-1]) outputMedians.append(outputMedians[-1]) output15thPercentiles.append(output15thPercentiles[-1]) output25thPercentiles.append(output25thPercentiles[-1]) output35thPercentiles.append(output35thPercentiles[-1]) output65thPercentiles.append(output65thPercentiles[-1]) output75thPercentiles.append(output75thPercentiles[-1]) output85thPercentiles.append(output85thPercentiles[-1]) priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue startIndex = startIndex[0] endIndex = df.index[df["Timestamp"] == date + self._dataTimeInterval].tolist() if len(endIndex) == 0: date += self._dataTimeInterval closeMeans.append(closeMeans[-1]) volumeMeans.append(volumeMeans[-1]) outputMedians.append(outputMedians[-1]) output15thPercentiles.append(output15thPercentiles[-1]) output25thPercentiles.append(output25thPercentiles[-1]) output35thPercentiles.append(output35thPercentiles[-1]) output65thPercentiles.append(output65thPercentiles[-1]) output75thPercentiles.append(output75thPercentiles[-1]) output85thPercentiles.append(output85thPercentiles[-1]) priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue endIndex = endIndex[0] data = df.iloc[startIndex : endIndex] if isAugmenting: x = phaseShift + count augmentation = 1 + np.sin(x) * uniform(0.02, 0.04) closeMeans.append(data["Close"].mean() * augmentation) volumeMeans.append(data["Volume"].mean() * augmentation) count += uniform(0.3, 0.6) if count > 2 * np.pi: count = 0 else: closeMeans.append(data["Close"].mean()) volumeMeans.append(data["Volume"].mean()) # Now we get the start and end dates for output data that would # be associated with an entry that begins at the data point found # above. Then we calculate the percentiles for the output. date2 = date + timedelta(days=DAYS_IN_AN_INPUT) startIndex = df.index[df["Timestamp"] == date2].tolist() if len(startIndex) == 0: date += self._dataTimeInterval outputMedians.append(outputMedians[-1]) output15thPercentiles.append(output15thPercentiles[-1]) output25thPercentiles.append(output25thPercentiles[-1]) output35thPercentiles.append(output35thPercentiles[-1]) output65thPercentiles.append(output65thPercentiles[-1]) output75thPercentiles.append(output75thPercentiles[-1]) output85thPercentiles.append(output85thPercentiles[-1]) priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue startIndex = startIndex[0] date2 += timedelta(hours=timePeriodForOutputs) endIndex = df.index[df["Timestamp"] == date2].tolist() if len(endIndex) == 0: date += self._dataTimeInterval outputMedians.append(outputMedians[-1]) output15thPercentiles.append(output15thPercentiles[-1]) output25thPercentiles.append(output25thPercentiles[-1]) output35thPercentiles.append(output35thPercentiles[-1]) output65thPercentiles.append(output65thPercentiles[-1]) output75thPercentiles.append(output75thPercentiles[-1]) output85thPercentiles.append(output85thPercentiles[-1]) priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue endIndex = endIndex[0] data = df.iloc[startIndex: endIndex]["Close"] outputMedians.append(data.median()) output15thPercentiles.append(data.quantile(0.15)) output25thPercentiles.append(data.quantile(0.25)) output35thPercentiles.append(data.quantile(0.35)) output65thPercentiles.append(data.quantile(0.65)) output75thPercentiles.append(data.quantile(0.75)) output85thPercentiles.append(data.quantile(0.85)) # Lastly, we need to get the last input day's mean price, which we # use to normalize our output percentiles. date3 = date + timedelta(days=DAYS_IN_AN_INPUT - 1) startIndex = df.index[df["Timestamp"] == date3].tolist() if len(startIndex) == 0: date += self._dataTimeInterval priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue startIndex = startIndex[0] date3 = date + timedelta(days=DAYS_IN_AN_INPUT) endIndex = df.index[df["Timestamp"] == date3].tolist() if len(endIndex) == 0: date += self._dataTimeInterval priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1]) volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1]) continue endIndex = endIndex[0] data = df.iloc[startIndex: endIndex] priceMeansToDivideLabelsBy.append(data["Close"].mean()) volumeMeansToDivideLabelsBy.append(data["Volume"].mean()) date += self._dataTimeInterval # Now that our while loop above collected data for inputs and # outputs, we need to generate technical indicators as additional # input features. We seem to be getting good performance if we only # use close, volume, rsi, ema and mfi, but we also have some other # indicators to play around with, such as ma and an additional rsi # with a different parameter. stock = StockDataFrame({ "close": closeMeans, "volume": volumeMeans }) # The standard RSI is 14 day. Note that if our time interval is 3 hrs, # there are 8 data points in a day. Thus, a 14 day RSI is a 112-RSI # because 14 * 8 = 112. rsis = (stock["rsi:112"] / 100).tolist() rsis2 = (stock["rsi:14"] / 100).tolist() emas = (stock["ema:21"]).tolist() macds = stock["macd:96,208"].tolist() macds2 = stock["macd:24,52"].tolist() bollUppers = stock["boll.upper:160"].tolist() bollLowers = stock["boll.lower:160"].tolist() from ta.volume import MFIIndicator moneyFlowIndex = MFIIndicator(stock["close"], stock["close"], stock["close"], stock["volume"], window=14) mfis = (moneyFlowIndex.money_flow_index().divide(100)).to_list() # This gets rid of NANs in our indicators (just in case). import math rsis = [0 if math.isnan(x) else x for x in rsis] rsis2 = [0 if math.isnan(x) else x for x in rsis2] emas = [0 if math.isnan(x) else x for x in emas] macds = [0 if math.isnan(x) else x for x in macds] macds2 = [0 if math.isnan(x) else x for x in macds2] bollUppers = [0 if math.isnan(x) else x for x in bollUppers] bollLowers = [0 if math.isnan(x) else x for x in bollLowers] mfis = [0 if math.isnan(x) else x for x in mfis] # Now we will generate our final inputs and outputs! See the for loop # below. entryAmount = int((len(closeMeans) - self._numberOfSamples - 1)) if self.dayByDay: advanceAmount = self._datapointsPerDay else: advanceAmount = 1 def fixWithin0And1(x): return min(max(x, 0.0), 1.0) for i in range(60 * self._datapointsPerDay, entryAmount, advanceAmount): print("Percent of entries created: " + str(i / entryAmount * 100) + "%") yesterdayCloseMean = priceMeansToDivideLabelsBy[i] yesterdayVolumeMean = volumeMeansToDivideLabelsBy[i] # This gets the input features and outputs for this dataset entry. close = closeMeans[i : i + self._numberOfSamples] volume = volumeMeans[i : i + self._numberOfSamples] rsi = rsis[i : i + self._numberOfSamples] rsi2 = rsis2[i: i + self._numberOfSamples] ema = emas[i: i + self._numberOfSamples] macd = macds[i: i + self._numberOfSamples] macd2 = macds2[i: i + self._numberOfSamples] ema = [fixWithin0And1(m / yesterdayCloseMean / 2) for m in ema] macd = [fixWithin0And1(m / yesterdayCloseMean / 2 + 0.5) for m in macd] macd2 = [fixWithin0And1(m / yesterdayCloseMean / 2 + 0.5) for m in macd2] mfi = mfis[i: i + self._numberOfSamples] bollUpper = bollUppers[i: i + self._numberOfSamples] bollUpper = [fixWithin0And1(m / yesterdayCloseMean / 2) for m in bollUpper] bollLower = bollLowers[i: i + self._numberOfSamples] bollLower = [fixWithin0And1(m / yesterdayCloseMean / 2) for m in bollLower] for j in range(len(close)): close[j] = fixWithin0And1(close[j] / yesterdayCloseMean / 2) for j in range(len(volume)): volume[j] = fixWithin0And1(volume[j] / yesterdayVolumeMean / 2) # Finally, we add the entry to the dataset. if useAllIndicators: self.inputData.append([close, volume, rsi, rsi2, ema, macd, macd2, bollUpper, bollLower, mfi]) else: self.inputData.append([close, volume, rsi, ema, mfi]) # This normalizes our data. 0.5 means that the percentile is the same # as the last day's mean. 1.0 means that the percentile is twice the # value of the last day's mean. We normalize in this way so that we # can use the sigmoid activation function for the outputs, which output15thPercentile = output15thPercentiles[i] / yesterdayCloseMean / 2 output25thPercentile = output25thPercentiles[i] / yesterdayCloseMean / 2 output35thPercentile = output35thPercentiles[i] / yesterdayCloseMean / 2 outputMedian = outputMedians[i] / yesterdayCloseMean / 2 output65thPercentile = output65thPercentiles[i] / yesterdayCloseMean / 2 output75thPercentile = output75thPercentiles[i] / yesterdayCloseMean / 2 output85thPercentile = output85thPercentiles[i] / yesterdayCloseMean / 2 self.outputData.append([ output15thPercentile, output25thPercentile, output35thPercentile, outputMedian, output65thPercentile, output75thPercentile, output85thPercentile ])