예제 #1
0
파일: volume.py 프로젝트: stikhun/ta
class TestMFIIndicator(unittest.TestCase):
    """
    https://school.stockcharts.com/doku.php?id=technical_indicators:money_flow_index_mfi
    """

    _filename = 'ta/tests/data/cs-mfi.csv'

    def setUp(self):
        self._df = pd.read_csv(self._filename, sep=',')
        self._indicator = MFIIndicator(
            high=self._df['High'], low=self._df['Low'], close=self._df['Close'], volume=self._df['Volume'], n=14,
            fillna=False)

    def tearDown(self):
        del(self._df)

    def test_mfi(self):
        target = 'MFI'
        result = self._indicator.money_flow_index()
        pd.testing.assert_series_equal(self._df[target].tail(), result.tail(), check_names=False)

    def test_mfi2(self):
        target = 'MFI'
        result = money_flow_index(
            high=self._df['High'], low=self._df['Low'], close=self._df['Close'], volume=self._df['Volume'], n=14,
            fillna=False)
        pd.testing.assert_series_equal(self._df[target].tail(), result.tail(), check_names=False)
예제 #2
0
def add_volume_indicators(data: pd.DataFrame) -> pd.DataFrame:
    """Adds the volume indicators.

    Parameters
    ----------
    data : pd.DataFrame
        A dataframe with daily stock values. Must include: open, high,
        low, close and volume. It should also be sorted in a descending
        manner.

    Returns
    -------
    pd.DataFrame
        The input dataframe with the indicators added.
    """
    chaikin = ChaikinMoneyFlowIndicator(data['high'], data['low'],
                                        data['close'], data['volume'])
    mfi = MFIIndicator(data['high'], data['low'], data['close'],
                       data['volume'])
    obv = OnBalanceVolumeIndicator(data['close'], data['volume'])

    data.loc[:, 'chaikin'] = chaikin.chaikin_money_flow()
    data.loc[:, 'mfi'] = mfi.money_flow_index()
    data.loc[:, 'obv'] = obv.on_balance_volume()

    return data
예제 #3
0
    def applyIndicator(self, full_company_price):
        self.data = full_company_price

        high = self.data['high']
        low = self.data['low']
        close = self.data['close']
        volume = self.data['volume']

        EMA12 = EMAIndicator(close, 12, fillna=False)
        EMA30 = EMAIndicator(close, 20, fillna=False)
        EMA60 = EMAIndicator(close, 60, fillna=False)
        MACD1226 = MACD(close, 26, 12, 9, fillna=False)
        MACD2452 = MACD(close, 52, 24, 18, fillna=False)
        ROC12 = ROCIndicator(close, 12, fillna=False)
        ROC30 = ROCIndicator(close, 30, fillna=False)
        ROC60 = ROCIndicator(close, 60, fillna=False)
        RSI14 = RSIIndicator(close, 14, fillna=False)
        RSI28 = RSIIndicator(close, 28, fillna=False)
        RSI60 = RSIIndicator(close, 60, fillna=False)
        AROON25 = AroonIndicator(close, 25, fillna=False)
        AROON50 = AroonIndicator(close, 50, fillna=False)
        AROON80 = AroonIndicator(close, 80, fillna=False)
        MFI14 = MFIIndicator(high, low, close, volume, 14, fillna=False)
        MFI28 = MFIIndicator(high, low, close, volume, 28, fillna=False)
        MFI80 = MFIIndicator(high, low, close, volume, 80, fillna=False)
        CCI20 = CCIIndicator(high, low, close, 20, 0.015, fillna=False)
        CCI40 = CCIIndicator(high, low, close, 40, 0.015, fillna=False)
        CCI100 = CCIIndicator(high, low, close, 100, 0.015, fillna=False)
        WILLR14 = WilliamsRIndicator(high, low, close, 14, fillna=False)
        WILLR28 = WilliamsRIndicator(high, low, close, 28, fillna=False)
        WILLR60 = WilliamsRIndicator(high, low, close, 60, fillna=False)
        BBANDS20 = BollingerBands(close, 20, 2, fillna=False)
        KC20 = KeltnerChannel(high, low, close, 20, 10, fillna=False)
        STOCH14 = StochasticOscillator(high, low, close, 14, 3, fillna=False)
        STOCH28 = StochasticOscillator(high, low, close, 28, 6, fillna=False)
        STOCH60 = StochasticOscillator(high, low, close, 60, 12, fillna=False)
        CMI20 = ChaikinMoneyFlowIndicator(high,
                                          low,
                                          close,
                                          volume,
                                          20,
                                          fillna=False)
        CMI40 = ChaikinMoneyFlowIndicator(high,
                                          low,
                                          close,
                                          volume,
                                          40,
                                          fillna=False)
        CMI100 = ChaikinMoneyFlowIndicator(high,
                                           low,
                                           close,
                                           volume,
                                           100,
                                           fillna=False)

        self.data['ema12'] = (close - EMA12.ema_indicator()) / close
        self.data['ema30'] = (close - EMA30.ema_indicator()) / close
        self.data['ema60'] = (close - EMA60.ema_indicator()) / close
        self.data['macd1226'] = MACD1226.macd() - MACD1226.macd_signal()
        self.data['macd2452'] = MACD2452.macd() - MACD2452.macd_signal()
        self.data['roc12'] = ROC12.roc()
        self.data['roc30'] = ROC30.roc()
        self.data['roc60'] = ROC60.roc()
        self.data['rsi14'] = RSI14.rsi()
        self.data['rsi28'] = RSI28.rsi()
        self.data['rsi60'] = RSI60.rsi()
        self.data['aroon25'] = AROON25.aroon_indicator()
        self.data['aroon50'] = AROON50.aroon_indicator()
        self.data['aroon80'] = AROON80.aroon_indicator()
        self.data['mfi14'] = MFI14.money_flow_index()
        self.data['mfi28'] = MFI28.money_flow_index()
        self.data['mfi80'] = MFI80.money_flow_index()
        self.data['cci20'] = CCI20.cci()
        self.data['cci40'] = CCI40.cci()
        self.data['cci100'] = CCI100.cci()
        self.data['willr14'] = WILLR14.wr()
        self.data['willr28'] = WILLR28.wr()
        self.data['willr60'] = WILLR60.wr()
        self.data['bband20up'] = (BBANDS20.bollinger_hband() - close) / close
        self.data['bband20down'] = (close - BBANDS20.bollinger_lband()) / close
        self.data['stoch14'] = STOCH14.stoch()
        self.data['stoch28'] = STOCH28.stoch()
        self.data['stoch60'] = STOCH60.stoch()
        self.data['cmi20'] = CMI20.chaikin_money_flow()
        self.data['cmi40'] = CMI40.chaikin_money_flow()
        self.data['cmi100'] = CMI100.chaikin_money_flow()
        self.data['kc20up'] = (KC20.keltner_channel_hband() - close) / close
        self.data['kc20down'] = (close - KC20.keltner_channel_lband()) / close
        return self.data
예제 #4
0
    def createDataset(self, symbol: str, startDate, endDate, useAllIndicators=True,
                      isAugmenting=False, timePeriodForOutputs=24):
        """
        Creates a dataset. Please make sure that the start and end dates are
        the beginnings of days.
        :param symbol: e.g. "BTCUSDT"
        :param startDate: e.g. datetime(year=2020, month=1, day=1)
        :param endDate: e.g. datetime(year=2020, month=2, day=1)
        :param useAllIndicators: if False, only uses the minimum indicators
        :param isAugmenting: used by createAugmentedDataset when augmenting.
        :param timePeriodForOutputs: if set to 24, this will generate the labels
                                     (percentiles) for the next 24 hours after
                                     the 15-day period that appears in the input.
        """
        # These are time-related variables.
        timezone = "Etc/GMT-0"
        timezone = pytz.timezone(timezone)
        outputStartDate = startDate
        # We need to go back a little earlier to generate indicators such as RSI.
        startDate -= timedelta(days=DAYS_IN_AN_INPUT + 60)
        endDate = timezone.localize(endDate)
        startDate = timezone.localize(startDate)
        # outputStartDate = timezone.localize(outputStartDate)

        # We will be collecting our final features and labels in here:
        self.inputData = []
        self.outputData = []

        # This dataframe has all the raw data we need to generate the dataset.
        df = self.dataObtainer.getHistoricalDataAsDataframe(symbol)

        # First, we will gather all of the means for our inputs...
        closeMeans = []
        volumeMeans = []

        # ... also, we will gather the outputs, which represent the
        # distributions of the next day prices.
        output15thPercentiles = []
        output25thPercentiles = []
        output35thPercentiles = []
        outputMedians = []
        output65thPercentiles = []
        output75thPercentiles = []
        output85thPercentiles = []

        # We will use this to normalize our outputs by dividing them by the
        # mean price of the last (latest/most recent) day in our input.
        priceMeansToDivideLabelsBy = []
        volumeMeansToDivideLabelsBy = []
        date = startDate

        # For augmentation:
        phaseShift = uniform(0, np.pi * 2)
        count = 0

        # Now we will be collecting the input prices, input volumes, and output
        # percentiles.
        while date < endDate:
            print("Processing", date, "/", endDate)
            # First, we will collect the start and end dates for this input
            # point (which consists of 3 hours of data if that is our input
            # time interval). Then we calculate the mean price and volume for
            # this input data point.
            startIndex = df.index[df["Timestamp"] == date].tolist()

            # If this if condition is true, then we may be missing some data in
            # our dataset. I think this happens during times when Binance was
            # down. In this case, we just use the previous data.
            if len(startIndex) == 0:
                date += self._dataTimeInterval
                closeMeans.append(closeMeans[-1])
                volumeMeans.append(volumeMeans[-1])
                outputMedians.append(outputMedians[-1])
                output15thPercentiles.append(output15thPercentiles[-1])
                output25thPercentiles.append(output25thPercentiles[-1])
                output35thPercentiles.append(output35thPercentiles[-1])
                output65thPercentiles.append(output65thPercentiles[-1])
                output75thPercentiles.append(output75thPercentiles[-1])
                output85thPercentiles.append(output85thPercentiles[-1])
                priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1])
                volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1])
                continue

            startIndex = startIndex[0]
            endIndex = df.index[df["Timestamp"] == date + self._dataTimeInterval].tolist()

            if len(endIndex) == 0:
                date += self._dataTimeInterval
                closeMeans.append(closeMeans[-1])
                volumeMeans.append(volumeMeans[-1])
                outputMedians.append(outputMedians[-1])
                output15thPercentiles.append(output15thPercentiles[-1])
                output25thPercentiles.append(output25thPercentiles[-1])
                output35thPercentiles.append(output35thPercentiles[-1])
                output65thPercentiles.append(output65thPercentiles[-1])
                output75thPercentiles.append(output75thPercentiles[-1])
                output85thPercentiles.append(output85thPercentiles[-1])
                priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1])
                volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1])
                continue

            endIndex = endIndex[0]
            data = df.iloc[startIndex : endIndex]

            if isAugmenting:
                x = phaseShift + count
                augmentation = 1 + np.sin(x) * uniform(0.02, 0.04)
                closeMeans.append(data["Close"].mean() * augmentation)
                volumeMeans.append(data["Volume"].mean() * augmentation)
                count += uniform(0.3, 0.6)

                if count > 2 * np.pi:
                    count = 0

            else:
                closeMeans.append(data["Close"].mean())
                volumeMeans.append(data["Volume"].mean())

            # Now we get the start and end dates for output data that would
            # be associated with an entry that begins at the data point found
            # above. Then we calculate the percentiles for the output.
            date2 = date + timedelta(days=DAYS_IN_AN_INPUT)
            startIndex = df.index[df["Timestamp"] == date2].tolist()

            if len(startIndex) == 0:
                date += self._dataTimeInterval
                outputMedians.append(outputMedians[-1])
                output15thPercentiles.append(output15thPercentiles[-1])
                output25thPercentiles.append(output25thPercentiles[-1])
                output35thPercentiles.append(output35thPercentiles[-1])
                output65thPercentiles.append(output65thPercentiles[-1])
                output75thPercentiles.append(output75thPercentiles[-1])
                output85thPercentiles.append(output85thPercentiles[-1])
                priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1])
                volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1])
                continue

            startIndex = startIndex[0]
            date2 += timedelta(hours=timePeriodForOutputs)
            endIndex = df.index[df["Timestamp"] == date2].tolist()

            if len(endIndex) == 0:
                date += self._dataTimeInterval
                outputMedians.append(outputMedians[-1])
                output15thPercentiles.append(output15thPercentiles[-1])
                output25thPercentiles.append(output25thPercentiles[-1])
                output35thPercentiles.append(output35thPercentiles[-1])
                output65thPercentiles.append(output65thPercentiles[-1])
                output75thPercentiles.append(output75thPercentiles[-1])
                output85thPercentiles.append(output85thPercentiles[-1])
                priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1])
                volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1])
                continue

            endIndex = endIndex[0]
            data = df.iloc[startIndex: endIndex]["Close"]
            outputMedians.append(data.median())
            output15thPercentiles.append(data.quantile(0.15))
            output25thPercentiles.append(data.quantile(0.25))
            output35thPercentiles.append(data.quantile(0.35))
            output65thPercentiles.append(data.quantile(0.65))
            output75thPercentiles.append(data.quantile(0.75))
            output85thPercentiles.append(data.quantile(0.85))

            # Lastly, we need to get the last input day's mean price, which we
            # use to normalize our output percentiles.
            date3 = date + timedelta(days=DAYS_IN_AN_INPUT - 1)
            startIndex = df.index[df["Timestamp"] == date3].tolist()

            if len(startIndex) == 0:
                date += self._dataTimeInterval
                priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1])
                volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1])
                continue

            startIndex = startIndex[0]
            date3 = date + timedelta(days=DAYS_IN_AN_INPUT)
            endIndex = df.index[df["Timestamp"] == date3].tolist()

            if len(endIndex) == 0:
                date += self._dataTimeInterval
                priceMeansToDivideLabelsBy.append(priceMeansToDivideLabelsBy[-1])
                volumeMeansToDivideLabelsBy.append(volumeMeansToDivideLabelsBy[-1])
                continue

            endIndex = endIndex[0]
            data = df.iloc[startIndex: endIndex]
            priceMeansToDivideLabelsBy.append(data["Close"].mean())
            volumeMeansToDivideLabelsBy.append(data["Volume"].mean())
            date += self._dataTimeInterval

        # Now that our while loop above collected data for inputs and
        # outputs, we need to generate technical indicators as additional
        # input features. We seem to be getting good performance if we only
        # use close, volume, rsi, ema and mfi, but we also have some other
        # indicators to play around with, such as ma and an additional rsi
        # with a different parameter.
        stock = StockDataFrame({
            "close": closeMeans,
            "volume": volumeMeans
        })

        # The standard RSI is 14 day. Note that if our time interval is 3 hrs,
        # there are 8 data points in a day. Thus, a 14 day RSI is a 112-RSI
        # because 14 * 8 = 112.
        rsis = (stock["rsi:112"] / 100).tolist()
        rsis2 = (stock["rsi:14"] / 100).tolist()
        emas = (stock["ema:21"]).tolist()
        macds = stock["macd:96,208"].tolist()
        macds2 = stock["macd:24,52"].tolist()
        bollUppers = stock["boll.upper:160"].tolist()
        bollLowers = stock["boll.lower:160"].tolist()
        from ta.volume import MFIIndicator
        moneyFlowIndex = MFIIndicator(stock["close"], stock["close"], stock["close"], stock["volume"], window=14)
        mfis = (moneyFlowIndex.money_flow_index().divide(100)).to_list()

        # This gets rid of NANs in our indicators (just in case).
        import math
        rsis = [0 if math.isnan(x) else x for x in rsis]
        rsis2 = [0 if math.isnan(x) else x for x in rsis2]
        emas = [0 if math.isnan(x) else x for x in emas]
        macds = [0 if math.isnan(x) else x for x in macds]
        macds2 = [0 if math.isnan(x) else x for x in macds2]
        bollUppers = [0 if math.isnan(x) else x for x in bollUppers]
        bollLowers = [0 if math.isnan(x) else x for x in bollLowers]
        mfis = [0 if math.isnan(x) else x for x in mfis]

        # Now we will generate our final inputs and outputs! See the for loop
        # below.
        entryAmount = int((len(closeMeans) - self._numberOfSamples - 1))

        if self.dayByDay:
            advanceAmount = self._datapointsPerDay
        else:
            advanceAmount = 1

        def fixWithin0And1(x):
            return min(max(x, 0.0), 1.0)

        for i in range(60 * self._datapointsPerDay, entryAmount, advanceAmount):
            print("Percent of entries created: " + str(i / entryAmount * 100) + "%")
            yesterdayCloseMean = priceMeansToDivideLabelsBy[i]
            yesterdayVolumeMean = volumeMeansToDivideLabelsBy[i]
            # This gets the input features and outputs for this dataset entry.
            close = closeMeans[i : i + self._numberOfSamples]
            volume = volumeMeans[i : i + self._numberOfSamples]
            rsi = rsis[i : i + self._numberOfSamples]
            rsi2 = rsis2[i: i + self._numberOfSamples]
            ema = emas[i: i + self._numberOfSamples]
            macd = macds[i: i + self._numberOfSamples]
            macd2 = macds2[i: i + self._numberOfSamples]
            ema = [fixWithin0And1(m / yesterdayCloseMean / 2) for m in ema]
            macd = [fixWithin0And1(m / yesterdayCloseMean / 2 + 0.5) for m in macd]
            macd2 = [fixWithin0And1(m / yesterdayCloseMean / 2 + 0.5) for m in macd2]
            mfi = mfis[i: i + self._numberOfSamples]
            bollUpper = bollUppers[i: i + self._numberOfSamples]
            bollUpper = [fixWithin0And1(m / yesterdayCloseMean / 2) for m in bollUpper]
            bollLower = bollLowers[i: i + self._numberOfSamples]
            bollLower = [fixWithin0And1(m / yesterdayCloseMean / 2) for m in bollLower]

            for j in range(len(close)):
                close[j] = fixWithin0And1(close[j] / yesterdayCloseMean / 2)

            for j in range(len(volume)):
                volume[j] = fixWithin0And1(volume[j] / yesterdayVolumeMean / 2)

            # Finally, we add the entry to the dataset.
            if useAllIndicators:
                self.inputData.append([close, volume, rsi, rsi2, ema, macd, macd2,
                                       bollUpper, bollLower, mfi])
            else:
                self.inputData.append([close, volume, rsi, ema, mfi])

            # This normalizes our data. 0.5 means that the percentile is the same
            # as the last day's mean. 1.0 means that the percentile is twice the
            # value of the last day's mean. We normalize in this way so that we
            # can use the sigmoid activation function for the outputs, which

            output15thPercentile = output15thPercentiles[i] / yesterdayCloseMean / 2
            output25thPercentile = output25thPercentiles[i] / yesterdayCloseMean / 2
            output35thPercentile = output35thPercentiles[i] / yesterdayCloseMean / 2
            outputMedian = outputMedians[i] / yesterdayCloseMean / 2
            output65thPercentile = output65thPercentiles[i] / yesterdayCloseMean / 2
            output75thPercentile = output75thPercentiles[i] / yesterdayCloseMean / 2
            output85thPercentile = output85thPercentiles[i] / yesterdayCloseMean / 2
            self.outputData.append([
                                    output15thPercentile,
                                    output25thPercentile,
                                    output35thPercentile,
                                    outputMedian,
                                    output65thPercentile,
                                    output75thPercentile,
                                    output85thPercentile
            ])