Beispiel #1
0
    def __dataframe(self):
        """Create an comprehensive list of  from data.

        Args:
            None

        Returns:
            result: dataframe for learning

        """
        # Calculate the percentage and real differences between columns
        difference = math.Difference(self._ohlcv)
        num_difference = difference.actual()
        pct_difference = difference.relative()

        # Create result to return.
        result = pd.DataFrame()

        # Add current value columns
        # NOTE Close must be first for correct correlation column dropping
        result['close'] = self._ohlcv['close']
        result['open'] = self._ohlcv['open']
        result['high'] = self._ohlcv['high']
        result['low'] = self._ohlcv['low']
        result['volume'] = self._ohlcv['volume']

        # Add columns of differences
        # NOTE Close must be first for correct correlation column dropping
        result['num_diff_close'] = num_difference['close']
        result['pct_diff_close'] = pct_difference['close']

        result['num_diff_open'] = num_difference['open']
        result['pct_diff_open'] = pct_difference['open']

        result['num_diff_high'] = num_difference['high']
        result['pct_diff_high'] = pct_difference['high']

        result['num_diff_low'] = num_difference['low']
        result['pct_diff_low'] = pct_difference['low']
        result['pct_diff_volume'] = pct_difference['volume']

        # Add date related columns
        # result['day'] = self._dates.day
        result['weekday'] = self._dates.weekday
        # result['week'] = self._dates.week
        result['month'] = self._dates.month
        result['quarter'] = self._dates.quarter
        # result['dayofyear'] = self._dates.dayofyear

        # Moving averages
        result['ma_open'] = result['open'].rolling(
            self._globals['ma_window']).mean()
        result['ma_high'] = result['high'].rolling(
            self._globals['ma_window']).mean()
        result['ma_low'] = result['low'].rolling(
            self._globals['ma_window']).mean()
        result['ma_close'] = result['close'].rolling(
            self._globals['ma_window']).mean()
        result['ma_volume'] = result['volume'].rolling(
            self._globals['vma_window']).mean()
        result['ma_volume_long'] = result['volume'].rolling(
            self._globals['vma_window_long']).mean()
        result['ma_volume_delta'] = result[
            'ma_volume_long'] - result['ma_volume']

        # Standard deviation related
        result['ma_std_close'] = result['close'].rolling(
            self._globals['ma_window']).std()
        result['std_pct_diff_close'] = result['pct_diff_close'].rolling(
            self._globals['ma_window']).std()
        result['bollinger_lband'] = volatility.bollinger_lband(result['close'])
        result['bollinger_hband'] = volatility.bollinger_lband(result['close'])
        result['bollinger_lband_indicator'] = volatility.bollinger_lband_indicator(result['close'])
        result['bollinger_hband_indicator'] = volatility.bollinger_hband_indicator(result['close'])

        # Rolling ranges
        result['amplitude'] = result['high'] - result['low']

        _min = result['low'].rolling(
            self._globals['week']).min()
        _max = result['high'].rolling(
            self._globals['week']).max()
        result['amplitude_medium'] = abs(_min - _max)

        _min = result['low'].rolling(
            2 * self._globals['week']).min()
        _max = result['high'].rolling(
            2 * self._globals['week']).max()
        result['amplitude_long'] = abs(_min - _max)

        _min = result['volume'].rolling(
            self._globals['week']).min()
        _max = result['volume'].rolling(
            self._globals['week']).max()
        result['vol_amplitude'] = abs(_min - _max)

        _min = result['volume'].rolling(
            2 * self._globals['week']).min()
        _max = result['volume'].rolling(
            2 * self._globals['week']).max()
        result['vol_amplitude_long'] = abs(_min - _max)

        # Volume metrics
        result['force_index'] = volume.force_index(
            result['close'], result['volume'])
        result['negative_volume_index'] = volume.negative_volume_index(
            result['close'], result['volume'])
        result['ease_of_movement'] = volume.ease_of_movement(
            result['high'], result['low'], result['close'], result['volume'])
        result['acc_dist_index'] = volume.acc_dist_index(
            result['high'], result['low'], result['close'], result['volume'])
        result['on_balance_volume'] = volume.on_balance_volume(
            result['close'], result['volume'])
        result['on_balance_volume_mean'] = volume.on_balance_volume(
            result['close'], result['volume'])
        result['volume_price_trend'] = volume.volume_price_trend(
            result['close'], result['volume'])

        # Calculate the Stochastic values
        result['k'] = momentum.stoch(
            result['high'],
            result['low'],
            result['close'],
            n=self._globals['kwindow'])

        result['d'] = momentum.stoch_signal(
            result['high'],
            result['low'],
            result['close'],
            n=self._globals['kwindow'],
            d_n=self._globals['dwindow'])

        # Calculate the Miscellaneous values
        result['rsi'] = momentum.rsi(
            result['close'],
            n=self._globals['rsiwindow'],
            fillna=False)

        miscellaneous = math.Misc(self._ohlcv)
        result['proc'] = miscellaneous.proc(self._globals['proc_window'])

        # Calculate ADX
        result['adx'] = trend.adx(
            result['high'],
            result['low'],
            result['close'],
            n=self._globals['adx_window'])

        # Calculate MACD difference
        result['macd_diff'] = trend.macd_diff(
            result['close'],
            n_fast=self._globals['macd_sign'],
            n_slow=self._globals['macd_slow'],
            n_sign=self._globals['macd_sign'])

        # Create series for increasing / decreasing closes (Convert NaNs to 0)
        _result = np.nan_to_num(result['pct_diff_close'].values)
        _increasing = (_result >= 0).astype(int) * self._buy
        _decreasing = (_result < 0).astype(int) * self._sell
        result['increasing'] = _increasing + _decreasing

        # Stochastic subtraciton
        result['k_d'] = pd.Series(result['k'].values - result['d'].values)

        # Other indicators
        result['k_i'] = self._stochastic_indicator(
            result['k'], result['high'], result['low'], result['ma_close'])
        result['d_i'] = self._stochastic_indicator(
            result['d'], result['high'], result['low'], result['ma_close'])
        result['stoch_i'] = self._stochastic_indicator_2(
            result['k'], result['d'],
            result['high'], result['low'], result['ma_close'])
        result['rsi_i'] = self._rsi_indicator(
            result['rsi'], result['high'], result['low'], result['ma_close'])
        result['adx_i'] = self._adx_indicator(result['adx'])
        result['macd_diff_i'] = self._macd_diff_indicator(result['macd_diff'])
        result['volume_i'] = self._volume_indicator(
            result['ma_volume'], result['ma_volume_long'])

        # Create time shifted columns
        for step in range(1, self._ignore_row_count + 1):
            result['t-{}'.format(step)] = result['close'].shift(step)
            result['tpd-{}'.format(step)] = result[
                'close'].pct_change(periods=step)
            result['tad-{}'.format(step)] = result[
                'close'].diff(periods=step)

        # Mask increasing with
        result['increasing_masked'] = _mask(
            result['increasing'].to_frame(),
            result['stoch_i'],
            as_integer=True).values

        # Get class values for each vector
        classes = pd.DataFrame(columns=self._shift_steps)
        for step in self._shift_steps:
            # Shift each column by the value of its label
            if self._binary is True:
                # Classes need to be 0 or 1 (One hot encoding)
                classes[step] = (
                    result[self._label2predict].shift(-step) > 0).astype(int)
            else:
                classes[step] = result[self._label2predict].shift(-step)
            # classes[step] = result[self._label2predict].shift(-step)

        # Delete the firsts row of the dataframe as it has NaN values from the
        # .diff() and .pct_change() operations
        ignore = max(max(self._shift_steps), self._ignore_row_count)
        result = result.iloc[ignore:]
        classes = classes.iloc[ignore:]

        # Convert result to float32 to conserve memory
        result = result.astype(np.float32)

        # Return
        return result, classes
Beispiel #2
0
    def __dataframe(self):
        """Create an comprehensive list of  from data.

        Args:
            None

        Returns:
            result: dataframe for learning

        """
        # Calculate the percentage and real differences between columns
        difference = math.Difference(self._ohlcv)
        num_difference = difference.actual()
        pct_difference = difference.relative()

        # Create result to return.
        result = pd.DataFrame()

        # Add current value columns
        result['open'] = self._ohlcv['open']
        result['high'] = self._ohlcv['high']
        result['low'] = self._ohlcv['low']
        result['close'] = self._ohlcv['close']
        result['volume'] = self._ohlcv['volume']

        # Add columns of differences
        result['num_diff_open'] = num_difference['open']
        result['num_diff_high'] = num_difference['high']
        result['num_diff_low'] = num_difference['low']
        result['num_diff_close'] = num_difference['close']
        result['pct_diff_open'] = pct_difference['open']
        result['pct_diff_high'] = pct_difference['high']
        result['pct_diff_low'] = pct_difference['low']
        result['pct_diff_close'] = pct_difference['close']
        result['pct_diff_volume'] = pct_difference['volume']

        # Add date related columns
        # result['day'] = self._dates.day
        result['weekday'] = self._dates.weekday
        # result['week'] = self._dates.week
        result['month'] = self._dates.month
        result['quarter'] = self._dates.quarter
        # result['dayofyear'] = self._dates.dayofyear

        # Moving averages
        result['ma_open'] = result['open'].rolling(
            self._globals['ma_window']).mean()
        result['ma_high'] = result['high'].rolling(
            self._globals['ma_window']).mean()
        result['ma_low'] = result['low'].rolling(
            self._globals['ma_window']).mean()
        result['ma_close'] = result['close'].rolling(
            self._globals['ma_window']).mean()
        result['ma_volume'] = result['volume'].rolling(
            self._globals['vma_window']).mean()
        result['ma_volume_long'] = result['volume'].rolling(
            self._globals['vma_window_long']).mean()
        result[
            'ma_volume_delta'] = result['ma_volume_long'] - result['ma_volume']

        # Standard deviation related
        result['ma_std_close'] = result['close'].rolling(
            self._globals['ma_window']).std()
        result['std_pct_diff_close'] = result['pct_diff_close'].rolling(
            self._globals['ma_window']).std()
        result['bollinger_lband'] = volatility.bollinger_lband(result['close'])
        result['bollinger_hband'] = volatility.bollinger_lband(result['close'])
        result[
            'bollinger_lband_indicator'] = volatility.bollinger_lband_indicator(
                result['close'])
        result[
            'bollinger_hband_indicator'] = volatility.bollinger_hband_indicator(
                result['close'])

        # Rolling ranges
        result['amplitude'] = result['high'] - result['low']

        _min = result['low'].rolling(self._globals['week']).min()
        _max = result['high'].rolling(self._globals['week']).max()
        result['amplitude_medium'] = abs(_min - _max)

        _min = result['low'].rolling(2 * self._globals['week']).min()
        _max = result['high'].rolling(2 * self._globals['week']).max()
        result['amplitude_long'] = abs(_min - _max)

        _min = result['volume'].rolling(self._globals['week']).min()
        _max = result['volume'].rolling(self._globals['week']).max()
        result['vol_amplitude'] = abs(_min - _max)

        _min = result['volume'].rolling(2 * self._globals['week']).min()
        _max = result['volume'].rolling(2 * self._globals['week']).max()
        result['vol_amplitude_long'] = abs(_min - _max)

        # Volume metrics
        result['force_index'] = volume.force_index(result['close'],
                                                   result['volume'])
        result['negative_volume_index'] = volume.negative_volume_index(
            result['close'], result['volume'])
        result['ease_of_movement'] = volume.ease_of_movement(
            result['high'], result['low'], result['close'], result['volume'])
        result['acc_dist_index'] = volume.acc_dist_index(
            result['high'], result['low'], result['close'], result['volume'])
        result['on_balance_volume'] = volume.on_balance_volume(
            result['close'], result['volume'])
        result['on_balance_volume_mean'] = volume.on_balance_volume(
            result['close'], result['volume'])
        result['volume_price_trend'] = volume.volume_price_trend(
            result['close'], result['volume'])

        # Calculate the Stochastic values
        result['k'] = momentum.stoch(result['high'],
                                     result['low'],
                                     result['close'],
                                     n=self._globals['kwindow'])

        result['d'] = momentum.stoch_signal(result['high'],
                                            result['low'],
                                            result['close'],
                                            n=self._globals['kwindow'],
                                            d_n=self._globals['dwindow'])

        # Calculate the Miscellaneous values
        result['rsi'] = momentum.rsi(result['close'],
                                     n=self._globals['rsiwindow'],
                                     fillna=False)

        miscellaneous = math.Misc(self._ohlcv)
        result['proc'] = miscellaneous.proc(self._globals['proc_window'])

        # Calculate ADX
        result['adx'] = trend.adx(result['high'],
                                  result['low'],
                                  result['close'],
                                  n=self._globals['adx_window'])

        # Calculate MACD difference
        result['macd_diff'] = trend.macd_diff(
            result['close'],
            n_fast=self._globals['macd_sign'],
            n_slow=self._globals['macd_slow'],
            n_sign=self._globals['macd_sign'])

        # Create series for increasing / decreasing closes (Convert NaNs to 0)
        _result = np.nan_to_num(result['pct_diff_close'].values)
        _increasing = (_result >= 0).astype(int) * self._buy
        _decreasing = (_result < 0).astype(int) * self._sell
        result['increasing'] = _increasing + _decreasing

        # Stochastic subtraciton
        result['k_d'] = pd.Series(result['k'].values - result['d'].values)

        # Other indicators
        result['k_i'] = self._stochastic_indicator(result['k'], result['high'],
                                                   result['low'],
                                                   result['ma_close'])
        result['d_i'] = self._stochastic_indicator(result['d'], result['high'],
                                                   result['low'],
                                                   result['ma_close'])
        result['stoch_i'] = self._stochastic_indicator_2(
            result['k'], result['d'], result['high'], result['low'],
            result['ma_close'])
        result['rsi_i'] = self._rsi_indicator(result['rsi'], result['high'],
                                              result['low'],
                                              result['ma_close'])
        result['adx_i'] = self._adx_indicator(result['adx'])
        result['macd_diff_i'] = self._macd_diff_indicator(result['macd_diff'])
        result['volume_i'] = self._volume_indicator(result['ma_volume'],
                                                    result['ma_volume_long'])

        # Create time shifted columns
        for step in range(1, self._ignore_row_count + 1):
            # result['t-{}'.format(step)] = result['close'].shift(step)
            result['tpd-{}'.format(step)] = result['close'].pct_change(
                periods=step)
            # result['tad-{}'.format(step)] = result[
            #    'close'].diff(periods=step)

        # Mask increasing with
        result['increasing_masked'] = _mask(result['increasing'].to_frame(),
                                            result['stoch_i'],
                                            as_integer=True).values

        # Get class values for each vector
        classes = pd.DataFrame(columns=self._shift_steps)
        for step in self._shift_steps:
            # Shift each column by the value of its label
            classes[step] = result[self._label2predict].shift(-step)

        # Remove all undesirable columns from the dataframe
        undesired_columns = ['open', 'close', 'high', 'low', 'volume']
        for column in undesired_columns:
            result = result.drop(column, axis=1)

        # Delete the firsts row of the dataframe as it has NaN values from the
        # .diff() and .pct_change() operations
        result = result.iloc[self._ignore_row_count:]
        classes = classes.iloc[self._ignore_row_count:]

        # Convert result to float32 to conserve memory
        result = result.astype(np.float32)

        # Return
        return result, classes
Beispiel #3
0
 def test_lband_indicator2(self):
     target = 'CrossDown'
     result = bollinger_lband_indicator(**self._params)
     pd.testing.assert_series_equal(self._df[target].tail(),
                                    result.tail(),
                                    check_names=False)