コード例 #1
0
ファイル: predictor.py プロジェクト: atremblay/MLND
    def __init__(self, ticker, over=7):
        super(Stock, self).__init__()
        self.ticker = ticker
        self.over = over

        self.rescale = MaxRescale(
            ['Adjusted Close', 'Volume', 'Open', 'Low', 'High', 'SP Close'])
        self.bollinger10 = BollingerBand(window=10)
        self.bollinger5 = BollingerBand(window=5)
コード例 #2
0
ファイル: predictor.py プロジェクト: atremblay/MLND
class Stock(object):
    """docstring for Stock"""
    def __init__(self, ticker, over=7):
        super(Stock, self).__init__()
        self.ticker = ticker
        self.over = over

        self.rescale = MaxRescale(
            ['Adjusted Close', 'Volume', 'Open', 'Low', 'High', 'SP Close'])
        self.bollinger10 = BollingerBand(window=10)
        self.bollinger5 = BollingerBand(window=5)

    def get_data(self, start_date, end_date, fit=False, cache=True):

        d_file_name = "d{}_{}_{}.csv".format(start_date, end_date, self.ticker)
        l_file_name = "l{}_{}_{}.csv".format(start_date, end_date, self.ticker)

        if cache and os.path.exists(d_file_name) and os.path.exists(l_file_name):
            data = pd.read_csv(d_file_name, index_col=0, parse_dates='Date')
            label = pd.read_csv(
                l_file_name,
                index_col=0,
                parse_dates=True,
                squeeze=True,
                header=None)
        else:
            data = QuandlAPI.get_data(self.ticker, start_date, end_date, 160, 15)
            label = Return(self.over).transform(data)

            #######################
            # Augment with index  #
            #######################
            sp500 = QuandlAPI.get_data(
                "INDEX_GSPC", start_date, end_date, 75, 15)
            data = data.join(sp500['Adj Close'], how='left')
            data.rename(columns={'Adj Close': 'SP Close'}, inplace=True)

            # Deal with outliers
            desc = data.describe()
            iqr = desc.ix['75%'] - desc.ix['25%']
            data = data[data <= desc.ix['50%'] + 1.5*iqr]
            data = data[data >= desc.ix['50%'] - 1.5*iqr]

            # Cache the raw data
            if cache:
                data.to_csv(d_file_name, index=True)
                label.to_csv(l_file_name, index=True)

        ##########
        # Adjust #
        ##########
        # TODO There may be a problem when subsequent call are made
        # The adjusted is always according to the last date
        # So two subsequent call with two different period will be adjusted
        # differently. Not sure if it matters or not
        ratio = data['Adjusted Close'] / data['Close']
        data['High'] = data['High']*ratio
        data['Low'] = data['Low']*ratio
        data['Open'] = data['Open']*ratio

        ##################
        # Rescale by max #
        ##################
        if fit:
            self.rescale.fit(data, start_date, end_date)
        data = self.rescale.transform(data)

        ######################
        # Average True Range #
        ######################
        # pdb.set_trace()
        data['ATR'] = ATR(data, 14)

        ##################
        # Bollinger band #
        ##################
        data = self.bollinger10(data)
        # data = self.bollinger5(data)

        #################
        # Rolling stats #
        #################
        data = high(22, data)  # Chandelier
        data = low(22, data)  # Chandelier

        # Ichimoku Clouds
        # http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:ichimoku_cloud
        data['tenkansen9'] = sen(data, 9)
        data['kijunsen26'] = sen(data, 26)
        data['senkouA'] = (data['tenkansen9'] + data['kijunsen26']) / 2. # Not sure if redondant. Affine transformation of two other features
        data['senkouB52'] = sen(data, 52)
        data['chikou26'] = data['Adjusted Close'].shift(26)

        # Moving Average
        # http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:moving_averages
        data['ema10'] = ema(data, 10)
        data['ema10-100'] = ema(data, 10) / ema(data, 100)
        data['ema25-100'] = ema(data, 25) / ema(data, 100)
        data['ema50-100'] = ema(data, 50) / ema(data, 100)
        data['kama'] = kama(data)
        data['ret_over2'] = return_over(data, 2)

        # RSI
        # http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:relative_strength_index_rsi
        data['rsi'] = rsi(data)

        # Price Volume Oscillator
        # http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:percentage_volume_oscillator_pvo
        ema_vol10 = ema(data, 10, 'Volume')
        ema_vol26 = ema(data, 26, 'Volume')
        data['pvo'] = (ema_vol10-ema_vol26)/ema_vol10

        # This is to make sure that both index are aligned.
        # If the start_date and end_date don't play well with the features
        # I'm making sure that it does not screw up the rest
        data.dropna(inplace=True)
        label.dropna(inplace=True)
        data, label = data[start_date:end_date], label[start_date:end_date]

        data_index, label_index = data.index, label.index
        max_start = max(data_index[0], label_index[0])
        min_end = min(data_index[-1], label_index[-1])
        data, label = data[max_start: min_end], label[max_start: min_end]

        del data['Close']
        # del data['Volume']
        # pdb.set_trace()
        return data, label