Esempio n. 1
0
    def load_data(self, predict='silver', add_stock=[], years=[2016, 2017]):
        '''
        Loads the stock and GDELT features
        ::param add_stock List ['gold', 'platinum'] etc
        '''
        #Load up Stocks
        self.stock_proc = StockProcessor()
        #years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]
        self.silver_prices, _ = self.stock_proc.gen_stock_data(
            years, 'silver', plot=False, norm=False, remove_trend=False)

        self.gold_prices, _ = self.stock_proc.gen_stock_data(years,
                                                             'gold',
                                                             plot=False,
                                                             norm=True,
                                                             remove_trend=True)
        self.platinum_prices, _ = self.stock_proc.gen_stock_data(
            years, 'platinum', plot=False, norm=True, remove_trend=True)
        self.palladium_prices, _ = self.stock_proc.gen_stock_data(
            years, 'palladium', plot=False, norm=True, remove_trend=True)
        logging.info('Loaded Stock')
        #These will be added to the GDELT feats
        self.all_stock_prices = {
            'silver': self.silver_prices,
            'gold': self.gold_prices,
            'platinum': self.platinum_prices,
            'palladium': self.palladium_prices
        }

        self.stock_to_predict = self.all_stock_prices[predict]

        #self.stock_to_predict = self.silver_prices

        #Init GDELT
        self.gdelt_proc = GdeltProcessor()
        self.states = self.gdelt_proc.load_features(years)
        #Concatenate all GDELT years
        if len(years) > 1:
            self.states = np.concatenate(
                (self.states[0], self.states[1], self.states[2]), axis=0)
        else:
            self.states = self.states[0]
        logging.debug('GDELT Shape Before Prices: %s', self.states.shape)

        for stock_name in add_stock:
            #Add the stock prices to the GDELT Features
            stock = self.all_stock_prices[stock_name]
            stock = stock.reshape(stock.shape[0], 1)
            logging.debug('GDELT and Prices %s, %s', self.states.shape,
                          stock.shape)
            self.states = np.concatenate((self.states, stock), axis=1)
            logging.debug('GDELT After Prices: %s', self.states.shape)
Esempio n. 2
0
    def load_data(
            self,
            predict='silver',
            add_stock=[],
            years=[2016, 2017],
            categories=[-20.0, -10.0, -5.0, -2.0, 0.0, 2.0, 5.0, 10.0, 20.0],
            forward_window_size=5,
            backward_window_size=5):
        '''
        Loads the stock and GDELT features
        ::param add_stock List ['gold', 'platinum'] etc
        ::forward_window_size int number of days to look forward to produce labels
        ::backward_window_size int number of day to look back for producing data to predict the look forward
        '''
        #Load up Stocks - for features
        self.stock_proc = StockProcessor()
        #years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]
        self.silver_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'silver', plot=False, norm=True, remove_trend=True)

        self.gold_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'gold', plot=False, norm=True, remove_trend=True)
        self.platinum_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'platinum', plot=False, norm=True, remove_trend=True)
        self.palladium_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'palladium', plot=False, norm=True, remove_trend=True)
        logging.info('Loaded Stock')
        #These will be added to the GDELT feats
        self.all_stock_prices = {
            'silver': self.silver_prices,
            'gold': self.gold_prices,
            'platinum': self.platinum_prices,
            'palladium': self.palladium_prices
        }

        self.all_stock_labels = {
            'silver': self.silver_labels,
            'gold': self.gold_labels,
            'platinum': self.platinum_labels,
            'palladium': self.palladium_labels
        }

        #Load up GDELT features
        self.gdelt_proc = GdeltProcessor()
        self.gdelt_feats = self.gdelt_proc.load_features(years)
        #Concatenate all GDELT years
        if len(years) > 1:
            concat = None
            for idx, _ in enumerate(years):
                if idx == 0:
                    concat = self.gdelt_feats[0]
                else:
                    concat = np.concatenate((concat, self.gdelt_feats[idx]),
                                            axis=0)
            #Assert we have correctly concatenated years
            self.gdelt_feats[0][10][0] == concat[10][0]
            self.gdelt_feats[1][10][0] == concat[self.gdelt_feats[0].shape[0] +
                                                 10][0]
            self.gdelt_feats = concat
        else:
            self.gdelt_feats = self.gdelt_feats[0]
        logging.debug('GDELT Shape: %s', self.gdelt_feats.shape)
        #Show some data
        for key, _ in self.all_stock_prices.items():
            logging.debug('Stock Shapes: %s, %s', key,
                          self.all_stock_prices[key].shape)
            assert self.all_stock_prices[key].shape[
                0] == self.gdelt_feats.shape[0]

        #Firstly concat the gdelt and stocks
        self.final_feats = self.gdelt_feats
        for key, _ in self.all_stock_prices.items():
            stock = self.all_stock_prices[key].reshape(
                self.all_stock_prices[key].shape[0], 1)
            self.final_feats = np.concatenate((self.final_feats, stock),
                                              axis=1)
        assert self.final_feats.shape[1] == len(
            self.all_stock_prices.items()) + self.gdelt_feats.shape[1]
        logging.debug('Concat features shape: %s, %s', self.final_feats.shape,
                      len(self.all_stock_prices.items()))

        #Now window into predicter features - this is complicated...
        #We start at 0+backward_window_size - so 0:backward_window_size is our first feature window
        #Then the label window for this will be 0+backward_window_size:0+backward_window_size+forward_window_size
        #Then slide by day
        #So first create the feature windows
        self.final_feat_windows = self.window_features(self.final_feats,
                                                       backward_window_size)
        logging.debug('Feat Window shape: %s', self.final_feat_windows.shape)

        #Now we load the predictor stock data and generate labels
        self.predictor_price_windows, self.labels = self.stock_proc.gen_stock_data(
            years,
            'silver',
            plot=False,
            norm=False,
            remove_trend=False,
            categories=categories,
            window_size=forward_window_size)

        #Now the feature windows start at 0 but the predictor windows start at one step in
        self.labels = self.labels[1:]
        self.labels_unique = np.unique(self.labels)
        logging.debug('Actual Unique Labels: %s', self.labels_unique)
        self.final_feat_windows = self.final_feat_windows[:-1]
        assert len(self.labels) == self.final_feat_windows.shape[0]
        #1 hot the labels
        self.labels_1hot = to_categorical(self.labels)
        logging.debug(self.labels_1hot.shape)

        logging.debug('Labels length: %s', len(self.labels))
        logging.debug('Windows shape: %s', self.final_feat_windows.shape)
Esempio n. 3
0
class StockEnv():
    '''
    '''
    def __init__(self, ):
        self.stock_proc = None
        self.gdelt_proc = None
        self.states = None
        self.silver_prices = None
        self.gold_prices = None
        self.platinum_prices = None
        self.palladium_prices = None
        self.silver_labels = None
        self.gold_labels = None
        self.platinum_labels = None
        self.palladium_labels = None
        self.all_stock_prices = None
        self.all_stock_labels = None
        self.gdelt_feats = None
        self.all_feats = None
        self.final_feat_windows = None
        self.predictor_price_windows = None
        self.labels = None
        self.model = None
        self.labels_1hot = None
        self.labels_unique = None

    def build_model(self, input_shape, num_classes):
        '''
        Build the keras model
        '''

        self.model = Sequential()
        self.model.add(
            Conv1D(filters=128,
                   kernel_size=1,
                   input_shape=input_shape,
                   activation='relu'))
        self.model.add(Flatten())
        self.model.add(Dropout(0.4))
        #self.model.add(Dense(2048, activation='relu'))
        self.model.add(Dense(1024, activation='relu'))
        self.model.add(Dense(num_classes, activation='softmax'))
        '''

        self.model = Sequential()
        self.model.add(Conv1D(64, 1, activation='relu', input_shape=input_shape))
        self.model.add(Conv1D(64, 1, activation='relu'))
        self.model.add(MaxPooling1D(1))
        self.model.add(Conv1D(128, 1, activation='relu'))
        self.model.add(Conv1D(128, 1, activation='relu'))
        self.model.add(GlobalAveragePooling1D())
        self.model.add(Dropout(0.5))
        self.model.add(Dense(num_classes, activation='softmax'))
        '''
        self.model.compile(optimizer='rmsprop',
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])

        return self.model

    def load_data(
            self,
            predict='silver',
            add_stock=[],
            years=[2016, 2017],
            categories=[-20.0, -10.0, -5.0, -2.0, 0.0, 2.0, 5.0, 10.0, 20.0],
            forward_window_size=5,
            backward_window_size=5):
        '''
        Loads the stock and GDELT features
        ::param add_stock List ['gold', 'platinum'] etc
        ::forward_window_size int number of days to look forward to produce labels
        ::backward_window_size int number of day to look back for producing data to predict the look forward
        '''
        #Load up Stocks - for features
        self.stock_proc = StockProcessor()
        #years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]
        self.silver_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'silver', plot=False, norm=True, remove_trend=True)

        self.gold_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'gold', plot=False, norm=True, remove_trend=True)
        self.platinum_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'platinum', plot=False, norm=True, remove_trend=True)
        self.palladium_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'palladium', plot=False, norm=True, remove_trend=True)
        logging.info('Loaded Stock')
        #These will be added to the GDELT feats
        self.all_stock_prices = {
            'silver': self.silver_prices,
            'gold': self.gold_prices,
            'platinum': self.platinum_prices,
            'palladium': self.palladium_prices
        }

        self.all_stock_labels = {
            'silver': self.silver_labels,
            'gold': self.gold_labels,
            'platinum': self.platinum_labels,
            'palladium': self.palladium_labels
        }

        #Load up GDELT features
        self.gdelt_proc = GdeltProcessor()
        self.gdelt_feats = self.gdelt_proc.load_features(years)
        #Concatenate all GDELT years
        if len(years) > 1:
            concat = None
            for idx, _ in enumerate(years):
                if idx == 0:
                    concat = self.gdelt_feats[0]
                else:
                    concat = np.concatenate((concat, self.gdelt_feats[idx]),
                                            axis=0)
            #Assert we have correctly concatenated years
            self.gdelt_feats[0][10][0] == concat[10][0]
            self.gdelt_feats[1][10][0] == concat[self.gdelt_feats[0].shape[0] +
                                                 10][0]
            self.gdelt_feats = concat
        else:
            self.gdelt_feats = self.gdelt_feats[0]
        logging.debug('GDELT Shape: %s', self.gdelt_feats.shape)
        #Show some data
        for key, _ in self.all_stock_prices.items():
            logging.debug('Stock Shapes: %s, %s', key,
                          self.all_stock_prices[key].shape)
            assert self.all_stock_prices[key].shape[
                0] == self.gdelt_feats.shape[0]

        #Firstly concat the gdelt and stocks
        self.final_feats = self.gdelt_feats
        for key, _ in self.all_stock_prices.items():
            stock = self.all_stock_prices[key].reshape(
                self.all_stock_prices[key].shape[0], 1)
            self.final_feats = np.concatenate((self.final_feats, stock),
                                              axis=1)
        assert self.final_feats.shape[1] == len(
            self.all_stock_prices.items()) + self.gdelt_feats.shape[1]
        logging.debug('Concat features shape: %s, %s', self.final_feats.shape,
                      len(self.all_stock_prices.items()))

        #Now window into predicter features - this is complicated...
        #We start at 0+backward_window_size - so 0:backward_window_size is our first feature window
        #Then the label window for this will be 0+backward_window_size:0+backward_window_size+forward_window_size
        #Then slide by day
        #So first create the feature windows
        self.final_feat_windows = self.window_features(self.final_feats,
                                                       backward_window_size)
        logging.debug('Feat Window shape: %s', self.final_feat_windows.shape)

        #Now we load the predictor stock data and generate labels
        self.predictor_price_windows, self.labels = self.stock_proc.gen_stock_data(
            years,
            'silver',
            plot=False,
            norm=False,
            remove_trend=False,
            categories=categories,
            window_size=forward_window_size)

        #Now the feature windows start at 0 but the predictor windows start at one step in
        self.labels = self.labels[1:]
        self.labels_unique = np.unique(self.labels)
        logging.debug('Actual Unique Labels: %s', self.labels_unique)
        self.final_feat_windows = self.final_feat_windows[:-1]
        assert len(self.labels) == self.final_feat_windows.shape[0]
        #1 hot the labels
        self.labels_1hot = to_categorical(self.labels)
        logging.debug(self.labels_1hot.shape)

        logging.debug('Labels length: %s', len(self.labels))
        logging.debug('Windows shape: %s', self.final_feat_windows.shape)

    logging.info('Completed Data Load')

    def window_features(self, features, window_size):
        '''
        Change day features into windows of given size.
        Each window contains all the prices for that window
        '''
        windows = []
        for idx in range(features.shape[0]):
            #If we are not at the last window
            if idx + window_size < features.shape[0]:
                windows.append(features[idx:idx + window_size])
            else:
                break
        return np.array(windows)
Esempio n. 4
0
    def load_data(self,
                  predict='silver',
                  add_stock=[],
                  years=[2016, 2017],
                  forward_window_size=5,
                  backward_window_size=5):
        '''
        Loads the stock and GDELT features
        ::param add_stock List ['gold', 'platinum'] etc
        ::forward_window_size int number of days to look forward to produce labels
        ::backward_window_size int number of day to look back for producing data to predict the look forward
        '''
        #Load up Stocks - for features
        self.stock_proc = StockProcessor()
        #years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]
        self.silver_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'silver', plot=False, norm=True, remove_trend=True)

        self.gold_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'gold', plot=False, norm=True, remove_trend=True)
        self.platinum_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'platinum', plot=False, norm=True, remove_trend=True)
        self.palladium_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'palladium', plot=False, norm=True, remove_trend=True)
        logging.info('Loaded Stock')
        #These will be added to the GDELT feats
        self.all_stock_prices = {
            'silver': self.silver_prices,
            'gold': self.gold_prices,
            'platinum': self.platinum_prices,
            'palladium': self.palladium_prices
        }

        #Load up GDELT features
        self.gdelt_proc = GdeltProcessor()
        self.gdelt_feats = self.gdelt_proc.load_features(years)
        #Concatenate all GDELT years
        if len(years) > 1:
            concat = None
            for idx, _ in enumerate(years):
                if idx == 0:
                    concat = self.gdelt_feats[0]
                else:
                    concat = np.concatenate((concat, self.gdelt_feats[idx]),
                                            axis=0)
            #Assert we have correctly concatenated years
            self.gdelt_feats[0][10][0] == concat[10][0]
            self.gdelt_feats[1][10][0] == concat[self.gdelt_feats[0].shape[0] +
                                                 10][0]
            self.gdelt_feats = concat
        else:
            self.gdelt_feats = self.gdelt_feats[0]
        logging.debug('GDELT Shape: %s', self.gdelt_feats.shape)
        #Show some data
        for key, _ in self.all_stock_prices.items():
            logging.debug('Stock Shapes: %s, %s', key,
                          self.all_stock_prices[key].shape)
            assert self.all_stock_prices[key].shape[
                0] == self.gdelt_feats.shape[0]

        #Firstly concat the gdelt and stocks
        self.final_feats = self.gdelt_feats
        for key, _ in self.all_stock_prices.items():
            stock = self.all_stock_prices[key].reshape(
                self.all_stock_prices[key].shape[0], 1)
            self.final_feats = np.concatenate((self.final_feats, stock),
                                              axis=1)
        assert self.final_feats.shape[1] == len(
            self.all_stock_prices.items()) + self.gdelt_feats.shape[1]
        logging.debug('Concat features shape: %s, %s', self.final_feats.shape,
                      len(self.all_stock_prices.items()))

        #Create the windows - remember we wont have labels for all of these necessarily
        #because the labels are calculated from future windows
        self.window_feats = self.window_features(self.final_feats,
                                                 backward_window_size)
        logging.debug('Window features shape: %s', self.window_feats.shape)

        #Generate signal features for these windows
        #So we go from (355, 10, 544) -> (355, 544*num signals per feat)
        signal_feats = []
        logging.info("Doing Windows...")
        for idx, window in enumerate(self.window_feats[:]):
            if idx % 100 == 0:
                logging.info('Done Windows (perc): %s',
                             idx / self.window_feats[:].shape[0])
            window_sig_feats = []
            #(10, 544)
            for feat in range(window.shape[1]):
                #Single feature - 10 days
                assert window[:, feat].shape[0] == backward_window_size
                window_sig_feats += self.gen_signal_for_window(window[:, feat])
            signal_feats.append(window_sig_feats)
        signal_feats = np.array(signal_feats)
        assert signal_feats.shape[0] == self.window_feats.shape[0]

        logging.debug('Window signal features shape: %s', signal_feats.shape)
        #Window signal features shape: (355, 2176) - so mean, min, max etc for every feature as each window

        #NEXT: define what these windows will predict - using look forward window and categories
        labels = self.window_labels(self.all_stock_prices[predict],
                                    forward_window_size, backward_window_size)
        logging.debug(labels.shape)

        #Finally we need to trim the features and labels to ensure they match
        signal_feats = signal_feats[0:labels.shape[0]]
        assert signal_feats.shape[0] == labels.shape[0]
        logging.info('Final Shapes: %s, %s', signal_feats.shape, labels.shape)
        return signal_feats, labels
Esempio n. 5
0
class StockEnv():
    '''
    '''
    def __init__(self, ):
        self.stock_proc = None
        self.gdelt_proc = None
        self.states = None
        self.silver_prices = None
        self.gold_prices = None
        self.platinum_prices = None
        self.palladium_prices = None
        self.silver_labels = None
        self.gold_labels = None
        self.platinum_labels = None
        self.palladium_labels = None
        self.all_stock_prices = None
        self.all_stock_labels = None
        self.gdelt_feats = None
        self.all_feats = None
        self.final_feat_windows = None
        self.predictor_price_windows = None
        self.labels = None
        self.model = None
        self.labels_1hot = None
        self.labels_unique = None

    def build_model(self, input_dim, num_classes):
        '''
        Build the keras model
        '''

        self.model = Sequential()
        self.model.add(Dense(64, input_dim=input_dim, activation='relu'))
        self.model.add(Dropout(0.5))
        #self.model.add(Dense(64, activation='relu'))
        #self.model.add(Dropout(0.5))
        self.model.add(Dense(1, activation='sigmoid'))
        self.model.compile(loss='binary_crossentropy',
                           optimizer='rmsprop',
                           metrics=['accuracy'])
        '''
        self.model = Sequential()
        self.model.add(Conv1D(filters=128, kernel_size=1, input_shape=input_shape, activation='relu'))
        self.model.add(Flatten())
        self.model.add(Dropout(0.4))
        #self.model.add(Dense(2048, activation='relu'))
        self.model.add(Dense(1024, activation='relu'))
        self.model.add(Dense(num_classes, activation='softmax'))
        '''
        '''

        self.model = Sequential()
        self.model.add(Conv1D(64, 1, activation='relu', input_shape=input_shape))
        self.model.add(Conv1D(64, 1, activation='relu'))
        self.model.add(MaxPooling1D(1))
        self.model.add(Conv1D(128, 1, activation='relu'))
        self.model.add(Conv1D(128, 1, activation='relu'))
        self.model.add(GlobalAveragePooling1D())
        self.model.add(Dropout(0.5))
        self.model.add(Dense(num_classes, activation='softmax'))
        '''

        return self.model

    def load_data(self,
                  predict='silver',
                  add_stock=[],
                  years=[2016, 2017],
                  forward_window_size=5,
                  backward_window_size=5):
        '''
        Loads the stock and GDELT features
        ::param add_stock List ['gold', 'platinum'] etc
        ::forward_window_size int number of days to look forward to produce labels
        ::backward_window_size int number of day to look back for producing data to predict the look forward
        '''
        #Load up Stocks - for features
        self.stock_proc = StockProcessor()
        #years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]
        self.silver_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'silver', plot=False, norm=True, remove_trend=True)

        self.gold_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'gold', plot=False, norm=True, remove_trend=True)
        self.platinum_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'platinum', plot=False, norm=True, remove_trend=True)
        self.palladium_prices, _, _ = self.stock_proc.gen_stock_data(
            years, 'palladium', plot=False, norm=True, remove_trend=True)
        logging.info('Loaded Stock')
        #These will be added to the GDELT feats
        self.all_stock_prices = {
            'silver': self.silver_prices,
            'gold': self.gold_prices,
            'platinum': self.platinum_prices,
            'palladium': self.palladium_prices
        }

        #Load up GDELT features
        self.gdelt_proc = GdeltProcessor()
        self.gdelt_feats = self.gdelt_proc.load_features(years)
        #Concatenate all GDELT years
        if len(years) > 1:
            concat = None
            for idx, _ in enumerate(years):
                if idx == 0:
                    concat = self.gdelt_feats[0]
                else:
                    concat = np.concatenate((concat, self.gdelt_feats[idx]),
                                            axis=0)
            #Assert we have correctly concatenated years
            self.gdelt_feats[0][10][0] == concat[10][0]
            self.gdelt_feats[1][10][0] == concat[self.gdelt_feats[0].shape[0] +
                                                 10][0]
            self.gdelt_feats = concat
        else:
            self.gdelt_feats = self.gdelt_feats[0]
        logging.debug('GDELT Shape: %s', self.gdelt_feats.shape)
        #Show some data
        for key, _ in self.all_stock_prices.items():
            logging.debug('Stock Shapes: %s, %s', key,
                          self.all_stock_prices[key].shape)
            assert self.all_stock_prices[key].shape[
                0] == self.gdelt_feats.shape[0]

        #Firstly concat the gdelt and stocks
        self.final_feats = self.gdelt_feats
        for key, _ in self.all_stock_prices.items():
            stock = self.all_stock_prices[key].reshape(
                self.all_stock_prices[key].shape[0], 1)
            self.final_feats = np.concatenate((self.final_feats, stock),
                                              axis=1)
        assert self.final_feats.shape[1] == len(
            self.all_stock_prices.items()) + self.gdelt_feats.shape[1]
        logging.debug('Concat features shape: %s, %s', self.final_feats.shape,
                      len(self.all_stock_prices.items()))

        #Create the windows - remember we wont have labels for all of these necessarily
        #because the labels are calculated from future windows
        self.window_feats = self.window_features(self.final_feats,
                                                 backward_window_size)
        logging.debug('Window features shape: %s', self.window_feats.shape)

        #Generate signal features for these windows
        #So we go from (355, 10, 544) -> (355, 544*num signals per feat)
        signal_feats = []
        logging.info("Doing Windows...")
        for idx, window in enumerate(self.window_feats[:]):
            if idx % 100 == 0:
                logging.info('Done Windows (perc): %s',
                             idx / self.window_feats[:].shape[0])
            window_sig_feats = []
            #(10, 544)
            for feat in range(window.shape[1]):
                #Single feature - 10 days
                assert window[:, feat].shape[0] == backward_window_size
                window_sig_feats += self.gen_signal_for_window(window[:, feat])
            signal_feats.append(window_sig_feats)
        signal_feats = np.array(signal_feats)
        assert signal_feats.shape[0] == self.window_feats.shape[0]

        logging.debug('Window signal features shape: %s', signal_feats.shape)
        #Window signal features shape: (355, 2176) - so mean, min, max etc for every feature as each window

        #NEXT: define what these windows will predict - using look forward window and categories
        labels = self.window_labels(self.all_stock_prices[predict],
                                    forward_window_size, backward_window_size)
        logging.debug(labels.shape)

        #Finally we need to trim the features and labels to ensure they match
        signal_feats = signal_feats[0:labels.shape[0]]
        assert signal_feats.shape[0] == labels.shape[0]
        logging.info('Final Shapes: %s, %s', signal_feats.shape, labels.shape)
        return signal_feats, labels

    logging.info('Completed Data Load')

    def window_labels(self, stock, forward_window_size, backward_window_size):
        '''
        Generate labels for the given stock
        '''
        start_label_window = backward_window_size + 1
        end_label_window = start_label_window + forward_window_size
        window_labels = []
        while end_label_window <= stock.shape[0]:
            label = self.gen_window_label(
                stock[start_label_window:end_label_window])
            window_labels.append(label)
            #Slide window by 1
            start_label_window += 1
            end_label_window += 1
        return np.array(window_labels)

    def gen_window_label(self, stock_window):
        '''
        Generate a label from the given window data
        '''
        #Initially just trend down (0) or up (1)
        if stock_window[-1] > stock_window[0]:
            return 1
        else:
            return 0

    def window_features(self, features, window_size):
        '''
        Change day features into windows of given size.
        Each window contains all the prices for that window
        '''
        windows = []
        for idx in range(features.shape[0]):
            #If we are not at the last window
            if idx + window_size < features.shape[0]:
                windows.append(features[idx:idx + window_size])
            else:
                break
        return np.array(windows)

    def gen_signal_for_window(self, window_data):
        '''
        Generate a set of signals for a given window and single country / input dimension
        e.g. mean, min, max etc
        '''
        output_feats = [
            np.min(window_data),
            np.max(window_data),
            np.mean(window_data),
            np.std(window_data),
        ]
        return output_feats
Esempio n. 6
0
                                          remove_trend=True)
    platinum_prices, days = sp.gen_stock_data(years,
                                              'platinum',
                                              plot=False,
                                              norm=True,
                                              remove_trend=True)
    palladium_prices, days = sp.gen_stock_data(years,
                                               'palladium',
                                               plot=False,
                                               norm=True,
                                               remove_trend=True)
    #These will be added to the GDELT feats
    all_feature_prices = [silver_prices]

    #Init GDELT
    gp = GdeltProcessor()
    gdelt_feats = gp.load_features(years)
    #Concatenate all GDELT year
    gdelt_feats = np.concatenate(
        (gdelt_feats[0][:, :270], gdelt_feats[1][:, :270]), axis=0)
    logging.debug('GDELT Before Prices: %s', gdelt_feats.shape)

    #Add the stock prices to the GDELT Features
    for stock in all_feature_prices:
        stock = stock.reshape(stock.shape[0], 1)

        logging.debug('GDELT and Prices %s, %s', gdelt_feats.shape,
                      stock.shape)
        gdelt_feats = np.concatenate((gdelt_feats, stock), axis=1)
        logging.debug('GDELT After Prices: %s', gdelt_feats.shape)
Esempio n. 7
0
class StockEnv():
    '''
    State will be GDELT feats and stocks on a given day
    Action will be buy or sell (1 unit)
    Reward will be price difference between today and next day
    Next state is the GDELT feats and stock for the next day
    '''
    def __init__(self, ):
        self.stock_proc = None
        self.gdelt_proc = None
        self.states = None
        self.silver_prices = None
        self.gold_prices = None
        self.platinum_prices = None
        self.palladium_prices = None
        self.all_stock_prices = None
        #Prices of the stock we are predicting
        self.stock_to_predict = None
        #Number of state dimensions
        self.state_dims = None
        #Last timestep processed
        #So when an action is called the reward is calculated based on this+1 day
        self.last_state_idx = 0
        #Stock overall holdings
        self.stock_holdings = 0
        self.total_stock_value = 0.0
        #Start with 100 to invest
        self.total_start_investment = 1000.0
        self.total_bank_value = self.total_start_investment
        self.max_bank_value_change = 0.0
        self.max_reward = 0.0
        self.last_stock_price = 0.0
        self.final_banks = []
        #Window being used for states - days
        self.state_window = 7

    def load_data(self, predict='silver', add_stock=[], years=[2016, 2017]):
        '''
        Loads the stock and GDELT features
        ::param add_stock List ['gold', 'platinum'] etc
        '''
        #Load up Stocks
        self.stock_proc = StockProcessor()
        #years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]
        self.silver_prices, _ = self.stock_proc.gen_stock_data(
            years, 'silver', plot=False, norm=False, remove_trend=False)

        self.gold_prices, _ = self.stock_proc.gen_stock_data(years,
                                                             'gold',
                                                             plot=False,
                                                             norm=True,
                                                             remove_trend=True)
        self.platinum_prices, _ = self.stock_proc.gen_stock_data(
            years, 'platinum', plot=False, norm=True, remove_trend=True)
        self.palladium_prices, _ = self.stock_proc.gen_stock_data(
            years, 'palladium', plot=False, norm=True, remove_trend=True)
        logging.info('Loaded Stock')
        #These will be added to the GDELT feats
        self.all_stock_prices = {
            'silver': self.silver_prices,
            'gold': self.gold_prices,
            'platinum': self.platinum_prices,
            'palladium': self.palladium_prices
        }

        self.stock_to_predict = self.all_stock_prices[predict]

        #self.stock_to_predict = self.silver_prices

        #Init GDELT
        self.gdelt_proc = GdeltProcessor()
        self.states = self.gdelt_proc.load_features(years)
        #Concatenate all GDELT years
        if len(years) > 1:
            self.states = np.concatenate(
                (self.states[0], self.states[1], self.states[2]), axis=0)
        else:
            self.states = self.states[0]
        logging.debug('GDELT Shape Before Prices: %s', self.states.shape)

        for stock_name in add_stock:
            #Add the stock prices to the GDELT Features
            stock = self.all_stock_prices[stock_name]
            stock = stock.reshape(stock.shape[0], 1)
            logging.debug('GDELT and Prices %s, %s', self.states.shape,
                          stock.shape)
            self.states = np.concatenate((self.states, stock), axis=1)
            logging.debug('GDELT After Prices: %s', self.states.shape)

    logging.info('Completed Data Load')

    def reset(self):
        '''
        Reset the environment back to the first day
        '''
        self.last_state_idx = 0
        #Reset holdings
        self.stock_holdings = 0
        self.total_stock_value = 0.0
        #Start with 100 to invest
        self.total_bank_value = self.total_start_investment
        self.max_bank_value_change = 0.0
        return self.get_state(0)

    def stock_value(self, price):
        '''
        Get the current holding stock value at given price
        '''
        return self.stock_holdings * price

    def gen_reward(self, action):
        '''
        Calculate the reward
        So the aim is to maximise the total I have in my ban - 
            i.e. transfer funds from stock market to my bank by buying and selling
        I want to learn to:
            sell when high changing to low (get rewarded for this)
            buy when low changing to high ()
            hold when nothing changing

        0=buy - reward = 0.0
        1=sell - reward = normed change in bank value
        2=hold - reward = 0.0
        We can retrend because the process steps through the year by day
        '''
        #Buy
        if action == 0:
            self.stock_holdings += 1
            #Pay for buying the stock at TODAYS price
            self.total_bank_value -= self.stock_to_predict[self.last_state_idx]
            #Penalise zero bank value
            #if self.total_bank_value < 0.0:
            #    self.total_bank_value = 0.0
            #    reward = -100.0
            #else:
            #Get the actual stock price TOMORROW
            new_stock_price = self.stock_to_predict[self.last_state_idx + 1]
            self.last_stock_price = new_stock_price
            self.total_stock_value = new_stock_price * self.stock_holdings
            #logging.debug('Buy - New Stock Price: %s, total value: %s, bank: %s', new_stock_price, self.total_stock_value, self.total_bank_value)
            # Reward for buying low - tomorrow price went up
            reward = new_stock_price - self.stock_to_predict[
                self.last_state_idx]

            #if reward > self.max_reward:
            #    self.max_reward = reward
            #reward = reward/self.max_reward
            #if reward < 0.0:
            #    reward =  0.0

        #Sell
        if action == 1:
            self.stock_holdings -= 1
            #Penalise zero stock holdings
            #if self.stock_holdings < 0:
            #    self.stock_holdings = 0
            #    reward = -100.0
            #else:
            #Get the actual stock price back so we can get overall stock value
            new_stock_price = self.stock_to_predict[self.last_state_idx + 1]
            self.last_stock_price = new_stock_price
            #Get the new bank value tomorrow (if I were to pull out now)
            new_bank_value = self.stock_holdings * new_stock_price
            #Reward = change in potential bank value
            #bank_value_change = (new_bank_value - self.total_bank_value)

            #Reward for selling high
            reward = self.stock_to_predict[
                self.last_state_idx] - new_stock_price

            #if reward > self.max_reward:
            #    self.max_reward = reward
            #reward = reward/self.max_reward
            #if reward < 0.0:
            #    reward = 0.0

            self.total_bank_value = new_bank_value
            self.total_stock_value = new_stock_price * self.stock_holdings
            #logging.debug('Sell - New Stock Price: %s, total value: %s, bank: %s', new_stock_price, self.total_stock_value, self.total_bank_value)
        #Hold
        if action == 2:
            #No stock
            if self.stock_holdings == 0:
                return 0.0
            #Get the actual stock price back so we can get overall stock value
            new_stock_price = self.stock_to_predict[self.last_state_idx + 1]
            #Get the new bank value tomorrow (if I were to pull out now)
            new_bank_value = self.stock_holdings * new_stock_price
            reward = self.stock_to_predict[
                self.last_state_idx] - new_stock_price
            #if reward < 0.0:
            #    reward = 0.0
            self.total_bank_value = new_bank_value
            self.total_stock_value = new_stock_price * self.stock_holdings
            #logging.debug('Hold - total value: %s, total bank: %s', self.total_stock_value, self.total_bank_value)
        return reward

    def get_state(self, state_index):
        '''
        Get the next state - based on the current window being used
        '''
        if self.state_window == 1:
            #One day
            return self.states[state_index]
        else:
            #Get a window and return its average
            return self.states[self.last_state_idx:self.last_state_idx +
                               self.state_window].flatten()

    def step(self, action):
        '''
        Calculate the output state, reward and done value for the given action
        '''
        #Price difference - today vs tmr on the one we are predicting
        reward = self.gen_reward(action)
        #Advance a day
        self.last_state_idx += 1
        new_state = self.get_state(self.last_state_idx)
        #Break if windowing and we can fit another window
        if new_state.shape[0] != self.states.shape[1] * self.state_window:
            done = True
        else:
            done = False
        '''
        if self.last_state_idx == self.stock_to_predict.shape[0]-1:
            done = True
        else:
            done = False
        '''
        return new_state, reward, done, None