def _init_stock_frames_data(self, start_date, end_date): # Remove invalid codes first. self._remove_invalid_codes() # Init columns and data set. columns, dates_set = ['open', 'high', 'low', 'close', 'volume'], set() # Init stocks data. for code in self.codes: # Get stocks data by code. stocks = Stock.get_k_data(code, start_date, end_date) # Init stocks dicts. stock_dicts = [stock.to_dic() for stock in stocks] # Get dates and stock data, build frames, save date. stocks_date, stocks_data = [stock[1] for stock in stock_dicts], [stock[2:] for stock in stock_dicts] # Update dates set. dates_set = dates_set.union(stocks_date) # Cache stock data. stocks_scaled = preprocessing.MinMaxScaler().fit_transform(stocks_data) origin_stock_frame = pd.DataFrame(data=stocks_data, index=stocks_date, columns=columns) scaled_stock_frame = pd.DataFrame(data=stocks_scaled, index=stocks_date, columns=columns) self.origin_stock_frames[code] = origin_stock_frame self.scaled_stock_frames[code] = scaled_stock_frame # Init dates and date iter. self.dates = sorted(list(dates_set)) # Rebuild index. for code in self.codes: origin_stock_frame = self.origin_stock_frames[code] scaled_stock_frame = self.scaled_stock_frames[code] self.origin_stock_frames[code] = origin_stock_frame.reindex(self.dates, method='bfill') self.scaled_stock_frames[code] = scaled_stock_frame.reindex(self.dates, method='bfill')
def _data_fetch(self): # Remove invalid codes first. self._validate_codes() # Init columns and data set. columns, dates_set = ['open', 'high', 'low', 'close', 'volume'], set() # Load data. for index, code in enumerate(self.state_codes): # Load instrument docs by code. instrument_docs = Stock.get_k_data(code, self._start_date, self._end_date) # Init instrument dicts. instrument_dicts = [ instrument.to_dic() for instrument in instrument_docs ] # Split dates. dates = [instrument[1] for instrument in instrument_dicts] # Split instruments. instruments = [instrument[2:] for instrument in instrument_dicts] # Update dates set. dates_set = dates_set.union(dates) # Build origin and scaled frames. scaler = self._scaler scaler.fit(instruments) instruments_scaled = scaler.transform(instruments) origin_frame = pd.DataFrame(data=instruments, index=dates, columns=columns) scaled_frame = pd.DataFrame(data=instruments_scaled, index=dates, columns=columns) # Build code - frame map. self._origin_frame = origin_frame self._scaled_frames = scaled_frame # Init date iter. self.dates = sorted(list(dates_set)) # Rebuild index. for code in self.state_codes: origin_frame = self.origin_frames[code] scaled_frame = self.scaled_frames[code] self.origin_frames[code] = origin_frame.reindex(self.dates, method='bfill') self.scaled_frames[code] = scaled_frame.reindex(self.dates, method='bfill')
class LSTMAlgorithm(BaseAlgorithm): def __init__(self, params): super(LSTMAlgorithm, self).__init__(params) def init_model(self): self.model.add(LSTM(10, input_dim=self.params.get('input_dim'), return_sequences=True)) self.model.add(TimeDistributed(Dense(1))) self.model.compile(loss="mse", optimizer="adam") self.model.summary() return self.model def run(self, x, y): return self.model.fit(x, y, epochs=1000, batch_size=128) def to_scale(data): scale = MinMaxScaler() scale.fit(data) return scale.transform(data) if __name__ == '__main__': doc_class = Stock() data_docs = doc_class.get_k_data('600036', '2017-01-01', '2018-01-01') data_dicts = [row.to_dict() for row in data_docs] data = [row[2:] for row in data_dicts] x = to_scale(data) y = x[4][2:] algorithm = LSTMAlgorithm({'input_dim': 5})