def crawl(self): stock_frame = ts.get_k_data(code=self.code, start=self.start, end=self.end, retry_count=30) for index in stock_frame.index: stock_series = stock_frame.loc[index] stock_dict = stock_series.to_dict() stock = Stock(**stock_dict) stock.save_if_need() logging.warning("Finish crawling code: {}, items count: {}".format( self.code, stock_frame.shape[0]))
def crawl(self): stock_frame = ts.get_k_data(code=self.code, start=self.start, end=self.end, retry_count=30) for index in stock_frame.index: # stock_index是从0到length(stock_frame)的整数 stock_series = stock_frame.loc[index] # 某一行的数据 stock_dict = stock_series.to_dict( ) # pandas提供的字典化方法, 返回{"date":"2018-08-01","open":5234,"close":5272,"high":5298,"volume":7665600,"code":"sh"} stock = Stock(**stock_dict) # 将字典的键和Stock类中各个字段对应起来进行组装 stock.save_if_need() # 储存进数据库 logging.warning("Finish crawling code: {}, items count: {}".format( self.code, stock_frame.shape[0]))
def _init_stock_frames_data(self, start_date, end_date): # Remove invalid codes first. self._remove_invalid_codes() # Init columns and data set. columns, dates_set = ['open', 'high', 'low', 'close', 'volume'], set() # Init stocks data. for code in self.codes: # Get stocks data by code. stocks = Stock.get_k_data(code, start_date, end_date) # Init stocks dicts. stock_dicts = [stock.to_dic() for stock in stocks] # Get dates and stock data, build frames, save date. stocks_date, stocks_data = [stock[1] for stock in stock_dicts], [stock[2:] for stock in stock_dicts] # Update dates set. dates_set = dates_set.union(stocks_date) # Cache stock data. stocks_scaled = preprocessing.MinMaxScaler().fit_transform(stocks_data) origin_stock_frame = pd.DataFrame(data=stocks_data, index=stocks_date, columns=columns) scaled_stock_frame = pd.DataFrame(data=stocks_scaled, index=stocks_date, columns=columns) self.origin_stock_frames[code] = origin_stock_frame self.scaled_stock_frames[code] = scaled_stock_frame # Init dates and date iter. self.dates = sorted(list(dates_set)) # Rebuild index. for code in self.codes: origin_stock_frame = self.origin_stock_frames[code] scaled_stock_frame = self.scaled_stock_frames[code] self.origin_stock_frames[code] = origin_stock_frame.reindex(self.dates, method='bfill') self.scaled_stock_frames[code] = scaled_stock_frame.reindex(self.dates, method='bfill')
def _remove_invalid_codes(self): if not len(self.codes): raise ValueError("Initialize, codes cannot be empty.") valid_codes = [code for code in self.codes if Stock.exist_in_db(code)] if not len(valid_codes): raise ValueError("Fatal Error: No valid codes or empty codes.") self.codes = valid_codes
def _remove_invalid_codes(self): if not len(self.codes): raise ValueError("Fatal error, odes cannot be empty.") valid_codes = [code for code in self.codes if Stock.exist_in_db(code)] if not len(valid_codes): raise ValueError("Fatal error, no valid codes in database.") self.codes = valid_codes
def _data_fetch(self): # Remove invalid codes first. self._validate_codes() # Init columns and data set. columns, dates_set = ['open', 'high', 'low', 'close', 'volume'], set() # Load data. for index, code in enumerate(self.state_codes): # Load instrument docs by code. instrument_docs = Stock.get_k_data(code, self._start_date, self._end_date) # Init instrument dicts. instrument_dicts = [ instrument.to_dic() for instrument in instrument_docs ] # Split dates. dates = [instrument[1] for instrument in instrument_dicts] # Split instruments. instruments = [instrument[2:] for instrument in instrument_dicts] # Update dates set. dates_set = dates_set.union(dates) # Build origin and scaled frames. scaler = self._scaler scaler.fit(instruments) instruments_scaled = scaler.transform(instruments) origin_frame = pd.DataFrame(data=instruments, index=dates, columns=columns) scaled_frame = pd.DataFrame(data=instruments_scaled, index=dates, columns=columns) # Build code - frame map. self._origin_frame = origin_frame self._scaled_frames = scaled_frame # Init date iter. self.dates = sorted(list(dates_set)) # Rebuild index. for code in self.state_codes: origin_frame = self.origin_frames[code] scaled_frame = self.scaled_frames[code] self.origin_frames[code] = origin_frame.reindex(self.dates, method='bfill') self.scaled_frames[code] = scaled_frame.reindex(self.dates, method='bfill')
def generate_sample_data(): dates = pd.date_range(start="2008-01-01", end="2008-01-30") for index, date in enumerate(dates): stock = Stock() stock.code = "T9999" stock.date = date stock.open = index stock.high = index + 1 stock.low = index - 0.5 stock.close = index + 1 stock.volume = 100 stock.save_if_need() for index, date in enumerate(dates[::-1]): stock = Stock() stock.code = "T9998" stock.date = date stock.open = index stock.high = index + 1 stock.low = index - 0.5 stock.close = index + 1 stock.volume = 100 stock.save_if_need()
class LSTMAlgorithm(BaseAlgorithm): def __init__(self, params): super(LSTMAlgorithm, self).__init__(params) def init_model(self): self.model.add(LSTM(10, input_dim=self.params.get('input_dim'), return_sequences=True)) self.model.add(TimeDistributed(Dense(1))) self.model.compile(loss="mse", optimizer="adam") self.model.summary() return self.model def run(self, x, y): return self.model.fit(x, y, epochs=1000, batch_size=128) def to_scale(data): scale = MinMaxScaler() scale.fit(data) return scale.transform(data) if __name__ == '__main__': doc_class = Stock() data_docs = doc_class.get_k_data('600036', '2017-01-01', '2018-01-01') data_dicts = [row.to_dict() for row in data_docs] data = [row[2:] for row in data_dicts] x = to_scale(data) y = x[4][2:] algorithm = LSTMAlgorithm({'input_dim': 5})