def update(self, end_date, start_date=None, symbols=None, f=False): self._symbol_index.update() logger.info('updating quote') old_latest_date = self._date_index.update(end_date) if (old_latest_date is None) and (start_date is None): logger.info('quote no need to update') return elif start_date is not None: if old_latest_date is not None: if not f: start_date = min(start_date, old_latest_date + 1) logger.info( f'quote updating append {start_date} - {end_date}') else: start_date = old_latest_date + 1 logger.info( f'quote updating in situ {start_date} - {end_date}') trade_dates = self._date_index.get_calendar(start_date, end_date) data = self.ingest_h(symbols=symbols, trade_dates=trade_dates) else: logger.info( f'quote updating append {old_latest_date + 1} - {end_date}') trade_dates = self._date_index.get_calendar( old_latest_date + 1, end_date) data = self.ingest_h(symbols=symbols, trade_dates=trade_dates) self.load('r+') if not data.empty: self.save(data)
def calc_single_market_with_winning(self, winning_period, market='US'): if self._symbols is None: self._symbols = self.get_symbols(market) self._start_date = self.get_date_offset(self._end_date, bar_count=winning_period, market=market) self._end_date = self.get_date_offset(self._end_date, bar_count=1, market=market) data_start_date = self.get_date_offset(self._start_date, bar_count=self._bar_count, market=market) logger.info('loading quote') data = self.get_daily_hists(self._symbols, data_start_date, self._end_date, market=market) calendar = self.get_calendar(start_date=self._start_date, end_date=self._end_date, market=market) for dt in calendar: period_start_date = self.get_date_offset(dt, self._bar_count, market) period_data = data[(data['timestamp'] <= dt) & (data['timestamp'] >= period_start_date)] self._calc_date = dt self.calc(period_data) logger.info('calc winning probability') winning_data = self._winning_probability(self._data_store, quote_data=data, market=self._market) data_store = self._data_store[self._end_date] for d in data_store: metrics = d['metrics'] d['winning'] = winning_data[metrics] self.save(data_store)
def calc(self, data): logger.info('cleaning invalid data') data.set_index(['timestamp', 'symbol'], inplace=True) _open_df = data['open'].unstack().dropna(axis=1, how='any') _high_df = data['high'].unstack().dropna(axis=1, how='any') _low_df = data['low'].unstack().dropna(axis=1, how='any') _close_df = data['close'].unstack().dropna(axis=1, how='any') _volume_df = data['volume'].unstack().dropna(axis=1, how='any') symbols = set(_open_df.columns) symbols.intersection_update(set(_high_df.columns)) symbols.intersection_update(set(_low_df.columns)) symbols.intersection_update(set(_close_df.columns)) symbols.intersection_update(set(_volume_df.columns)) symbols = list(symbols) self._data_symbols = pd.Index(symbols) self._datetime_index = pd.to_datetime(_open_df.index) _open = _open_df.reindex(symbols, axis=1).to_numpy() _high = _high_df.reindex(symbols, axis=1).to_numpy() _low = _low_df.reindex(symbols, axis=1).to_numpy() _close = _close_df.reindex(symbols, axis=1).to_numpy() _volume = _volume_df.reindex(symbols, axis=1).to_numpy() logger.info(f'calc metrics start, open shape: {_open.shape}') self._calculator_instance = Classical(_open, _high, _low, _close, _volume) self._c_three_red_soldiers() self._c_three_crow() self._c_multi_cannon() self._c_morning_start() self._c_duck_head() self._c_rise_wrap_fall() self._c_fall_wrap_raise() self._c_rise_pregnant() self._c_golden_spider() self._c_dead_spider()
def calc_single_market(self, market='US'): if self._symbols is None: self._symbols = self.get_symbols(market) self._end_date = self._calc_date = self.get_valid_date(self._end_date) start_date = self.get_date_offset(self._end_date, bar_count=self._bar_count, market=market) logger.info('loading quote') data = self.get_daily_hists(self._symbols, start_date, self._end_date, market=market) logger.debug(data.tail()) self.calc(data) self.save(list(self._data_store[self._end_date]))
def load(self): logger.info('loading symbol info') self._stock_contract_list = [] self._symbol_contract_dict = {} self._contract_df = pd.read_csv(self._index_path, dtype='str') for i, row in self._contract_df.iterrows(): sc = StockContract(code=row.code, symbol=row.symbol, sid=i, name=row.name, market=self._market, industry=row.industry, board=row.board, area=row.area, list_date=row.list_date) self._stock_contract_list.append(sc) self._symbol_contract_dict[row.symbol] = sc
def ingest(self, symbols, start_date, end_date): logger.info(f'quote ingest start') if symbols is None: symbols = self._symbol_index.symbols fetch_chunk_num = 50 symbols_len = len(symbols) all_chunk_num = symbols_len // fetch_chunk_num + 1 for i in range(0, len(symbols), fetch_chunk_num): logger.info( f'ingest quote: {i // fetch_chunk_num + 1}/{all_chunk_num}') period_symbols = symbols[i:i + fetch_chunk_num] yield StandardQuoteIngester.ingest_daily_hists_v( period_symbols, start_date, end_date)
def run(self, end_date=None, symbols=None, market='US', winning_period=60, forecast_days=(1, 3, 5), winning=False): logger.info(f'running with args end_date: {end_date}, symbols: {symbols}, market: {market}') self._end_date = end_date if end_date else int((datetime.today().date() - timedelta(days=1)).strftime('%Y%m%d')) # self._start_date = start_date if market == 'ALL': self.run(end_date, symbols, 'CN') else: self._market = market self._symbols = symbols # if symbols else self.load_symbols(market) if not winning: self.calc_single_market(market) else: # if self._start_date is None: # raise ValueError('start_date required when roll is True') self.calc_single_market_with_winning(winning_period, market)
def save(self, data): logger.info(f'saving data.shape: {data.shape}') data['sid'] = sids = data.symbol.apply(self._symbol_index.i_of) data['did'] = data.trade_date.apply(self._date_index.i_of) logger.debug('[daily_bar_util] saving ohlcv:\n %s' % (data, )) data.dropna(how='any', inplace=True) data['did'] = data['did'].astype('int') data['sid'] = data['sid'].astype('int') did_calendar = list(range(min(data['did']), max(data['did']) + 1)) data.set_index(['did', 'sid'], inplace=True) start_id = did_calendar[0] end_id = did_calendar[-1] for field in self._fields_dict.values(): single_field_data = data[field.name].unstack() single_field_data = single_field_data.sort_index().fillna(0) single_field_data = single_field_data.reindex(did_calendar, method='pad') for sid in single_field_data.columns: mmp_obj = self._get_memmap_obj(feild_obj=field, sid=sid) mmp_obj[start_id:end_id+1, sid % self._chunks_s_num] = \ (single_field_data[sid] * field.precision).to_numpy(dtype='int32')
def update(self): self.load() logger.info('updating contract') new = self.ingest() old = self._contract_df['symbol'] to_update_df = new.set_index('symbol').drop(old).reset_index() if not to_update_df.empty: logger.info(f'update contracts {to_update_df["symbol"].to_list()}') self._contract_df = self._contract_df.append(to_update_df) self.save(self._contract_df) self.load() else: logger.info('no new contracts to update')
def ingest_h(self, symbols, trade_dates): logger.info(f'quote ingest start') return StandardQuoteIngester.ingest_daily_hists_h( symbols, trade_dates, self._market)
def save(self, data: pd.DataFrame): logger.info('saving symbol info') data.to_csv(self._index_path, index=False, mode='w+')
def init(self): logger.info('init symbol info') col_df = self.ingest() self.save(col_df) self.load()
def ingest(self): logger.info('ingest symbol info') return StandardQuoteIngester.ingest_basic(self._market)
def _winning_probability(self, forecast_data, forecast_days=(1, 3, 5), quote_data=None, market='US'): """ 计算胜率 :param forecast_data: ditc[ list ] { datetime.datetime(2019, 9, 27, 0, 0): [{'metrics': 'three_red_soldiers', 'date': datetime.datetime(2019, 9, 27, 0, 0), 'market': 'US', 'direction': 1, # 预测未来涨跌方向 'data': ['HGH', 'LEO', 'EE', 'BBDC', 'NCA', 'USAC', 'MHI']}, .... ], ..... } :param forecast_days: 需要检验的未来天数列表, (1, 3, 5) :param market: :return: dict {'three_red_soldiers': {1: 0.49767227902371464, 3: 0.43380883095574463, 5: 0.4075340831245499}, 'three_crow': {1: 0.563627227684226, 3: 0.5850837879834898, 5: 0.5889460044487472}, ...... } """ from collections import defaultdict winning = defaultdict(lambda: defaultdict(list)) start_date = min(forecast_data.keys()) end_date = max(forecast_data.keys()) if quote_data is None: effect_end_date = self.get_date_offset( end_date, bar_count=-max(forecast_days), market=market) quote_data = self.get_daily_hists(None, start_date=start_date, end_date=effect_end_date, fields=('close', ), market=market) quote_data.set_index(['timestamp', 'symbol'], inplace=True) quote_data.sort_index(inplace=True) quote_data = quote_data['close'].unstack() logger.debug(f'data: {quote_data.tail()}') future_winning_dict = {} for d in forecast_days: pct_change = quote_data.pct_change(d).shift(-d) pct_change.dropna(how='all', inplace=True) future_winning_dict[d] = pct_change > 0 logger.debug(f'd1_pct_change: {future_winning_dict[1].tail()}') for dt, metrics in forecast_data.items(): for m in metrics: name = m['metrics'] symbols = m['data'] direction = m['direction'] symbols_number = len(symbols) try: for d, fu in future_winning_dict.items(): try: winning_count = fu.loc[dt, symbols].sum() except KeyError: logger.debug( f'winning at {dt} con`t calc for quote is less {max(forecast_days)} after' ) continue winning_pct = int(winning_count / symbols_number) if direction == -1: winning_pct = 1 - winning_pct winning[name][d].append(winning_pct) except KeyError as e: logger.info(e) for k, v in winning.items(): for d, pct in v.items(): win_avg_pct = sum(pct) / len(pct) winning[k][d] = win_avg_pct return winning