Esempio n. 1
0
 def update(self, end_date, start_date=None, symbols=None, f=False):
     self._symbol_index.update()
     logger.info('updating quote')
     old_latest_date = self._date_index.update(end_date)
     if (old_latest_date is None) and (start_date is None):
         logger.info('quote no need to update')
         return
     elif start_date is not None:
         if old_latest_date is not None:
             if not f:
                 start_date = min(start_date, old_latest_date + 1)
                 logger.info(
                     f'quote updating append {start_date} - {end_date}')
             else:
                 start_date = old_latest_date + 1
                 logger.info(
                     f'quote updating in situ {start_date} - {end_date}')
         trade_dates = self._date_index.get_calendar(start_date, end_date)
         data = self.ingest_h(symbols=symbols, trade_dates=trade_dates)
     else:
         logger.info(
             f'quote updating append {old_latest_date + 1} - {end_date}')
         trade_dates = self._date_index.get_calendar(
             old_latest_date + 1, end_date)
         data = self.ingest_h(symbols=symbols, trade_dates=trade_dates)
     self.load('r+')
     if not data.empty:
         self.save(data)
Esempio n. 2
0
    def calc_single_market_with_winning(self, winning_period, market='US'):
        if self._symbols is None:
            self._symbols = self.get_symbols(market)
        self._start_date = self.get_date_offset(self._end_date, bar_count=winning_period, market=market)
        self._end_date = self.get_date_offset(self._end_date, bar_count=1, market=market)

        data_start_date = self.get_date_offset(self._start_date, bar_count=self._bar_count, market=market)
        logger.info('loading quote')

        data = self.get_daily_hists(self._symbols, data_start_date, self._end_date, market=market)
        calendar = self.get_calendar(start_date=self._start_date, end_date=self._end_date, market=market)

        for dt in calendar:
            period_start_date = self.get_date_offset(dt, self._bar_count, market)
            period_data = data[(data['timestamp'] <= dt) & (data['timestamp'] >= period_start_date)]
            self._calc_date = dt
            self.calc(period_data)

        logger.info('calc winning probability')

        winning_data = self._winning_probability(self._data_store, quote_data=data, market=self._market)

        data_store = self._data_store[self._end_date]

        for d in data_store:
            metrics = d['metrics']
            d['winning'] = winning_data[metrics]

        self.save(data_store)
Esempio n. 3
0
    def calc(self, data):
        logger.info('cleaning invalid data')
        data.set_index(['timestamp', 'symbol'], inplace=True)
        _open_df = data['open'].unstack().dropna(axis=1, how='any')
        _high_df = data['high'].unstack().dropna(axis=1, how='any')
        _low_df = data['low'].unstack().dropna(axis=1, how='any')
        _close_df = data['close'].unstack().dropna(axis=1, how='any')
        _volume_df = data['volume'].unstack().dropna(axis=1, how='any')

        symbols = set(_open_df.columns)
        symbols.intersection_update(set(_high_df.columns))
        symbols.intersection_update(set(_low_df.columns))
        symbols.intersection_update(set(_close_df.columns))
        symbols.intersection_update(set(_volume_df.columns))
        symbols = list(symbols)
        self._data_symbols = pd.Index(symbols)
        self._datetime_index = pd.to_datetime(_open_df.index)
        _open = _open_df.reindex(symbols, axis=1).to_numpy()
        _high = _high_df.reindex(symbols, axis=1).to_numpy()
        _low = _low_df.reindex(symbols, axis=1).to_numpy()
        _close = _close_df.reindex(symbols, axis=1).to_numpy()
        _volume = _volume_df.reindex(symbols, axis=1).to_numpy()

        logger.info(f'calc metrics start, open shape: {_open.shape}')
        self._calculator_instance = Classical(_open, _high, _low, _close, _volume)
        self._c_three_red_soldiers()
        self._c_three_crow()
        self._c_multi_cannon()
        self._c_morning_start()
        self._c_duck_head()
        self._c_rise_wrap_fall()
        self._c_fall_wrap_raise()
        self._c_rise_pregnant()
        self._c_golden_spider()
        self._c_dead_spider()
Esempio n. 4
0
 def calc_single_market(self, market='US'):
     if self._symbols is None:
         self._symbols = self.get_symbols(market)
     self._end_date = self._calc_date = self.get_valid_date(self._end_date)
     start_date = self.get_date_offset(self._end_date, bar_count=self._bar_count, market=market)
     logger.info('loading quote')
     data = self.get_daily_hists(self._symbols, start_date, self._end_date, market=market)
     logger.debug(data.tail())
     self.calc(data)
     self.save(list(self._data_store[self._end_date]))
Esempio n. 5
0
    def load(self):
        logger.info('loading symbol info')
        self._stock_contract_list = []
        self._symbol_contract_dict = {}
        self._contract_df = pd.read_csv(self._index_path, dtype='str')
        for i, row in self._contract_df.iterrows():
            sc = StockContract(code=row.code, symbol=row.symbol, sid=i, name=row.name, market=self._market,
                               industry=row.industry, board=row.board, area=row.area, list_date=row.list_date)

            self._stock_contract_list.append(sc)
            self._symbol_contract_dict[row.symbol] = sc
Esempio n. 6
0
    def ingest(self, symbols, start_date, end_date):
        logger.info(f'quote ingest start')

        if symbols is None:
            symbols = self._symbol_index.symbols

        fetch_chunk_num = 50
        symbols_len = len(symbols)
        all_chunk_num = symbols_len // fetch_chunk_num + 1

        for i in range(0, len(symbols), fetch_chunk_num):
            logger.info(
                f'ingest quote: {i // fetch_chunk_num + 1}/{all_chunk_num}')
            period_symbols = symbols[i:i + fetch_chunk_num]
            yield StandardQuoteIngester.ingest_daily_hists_v(
                period_symbols, start_date, end_date)
Esempio n. 7
0
    def run(self, end_date=None, symbols=None, market='US', winning_period=60, forecast_days=(1, 3, 5), winning=False):
        logger.info(f'running with args end_date: {end_date}, symbols: {symbols}, market: {market}')
        self._end_date = end_date if end_date else int((datetime.today().date() - timedelta(days=1)).strftime('%Y%m%d'))
        # self._start_date = start_date

        if market == 'ALL':
            self.run(end_date, symbols, 'CN')
        else:
            self._market = market
            self._symbols = symbols                 # if symbols else self.load_symbols(market)
            if not winning:
                self.calc_single_market(market)
            else:
                # if self._start_date is None:
                #     raise ValueError('start_date required when roll is True')
                self.calc_single_market_with_winning(winning_period, market)
Esempio n. 8
0
 def save(self, data):
     logger.info(f'saving data.shape: {data.shape}')
     data['sid'] = sids = data.symbol.apply(self._symbol_index.i_of)
     data['did'] = data.trade_date.apply(self._date_index.i_of)
     logger.debug('[daily_bar_util] saving ohlcv:\n %s' % (data, ))
     data.dropna(how='any', inplace=True)
     data['did'] = data['did'].astype('int')
     data['sid'] = data['sid'].astype('int')
     did_calendar = list(range(min(data['did']), max(data['did']) + 1))
     data.set_index(['did', 'sid'], inplace=True)
     start_id = did_calendar[0]
     end_id = did_calendar[-1]
     for field in self._fields_dict.values():
         single_field_data = data[field.name].unstack()
         single_field_data = single_field_data.sort_index().fillna(0)
         single_field_data = single_field_data.reindex(did_calendar,
                                                       method='pad')
         for sid in single_field_data.columns:
             mmp_obj = self._get_memmap_obj(feild_obj=field, sid=sid)
             mmp_obj[start_id:end_id+1, sid % self._chunks_s_num] = \
                 (single_field_data[sid] * field.precision).to_numpy(dtype='int32')
Esempio n. 9
0
 def update(self):
     self.load()
     logger.info('updating contract')
     new = self.ingest()
     old = self._contract_df['symbol']
     to_update_df = new.set_index('symbol').drop(old).reset_index()
     if not to_update_df.empty:
         logger.info(f'update contracts {to_update_df["symbol"].to_list()}')
         self._contract_df = self._contract_df.append(to_update_df)
         self.save(self._contract_df)
         self.load()
     else:
         logger.info('no new contracts to update')
Esempio n. 10
0
 def ingest_h(self, symbols, trade_dates):
     logger.info(f'quote ingest start')
     return StandardQuoteIngester.ingest_daily_hists_h(
         symbols, trade_dates, self._market)
Esempio n. 11
0
 def save(self, data: pd.DataFrame):
     logger.info('saving symbol info')
     data.to_csv(self._index_path, index=False, mode='w+')
Esempio n. 12
0
 def init(self):
     logger.info('init symbol info')
     col_df = self.ingest()
     self.save(col_df)
     self.load()
Esempio n. 13
0
 def ingest(self):
     logger.info('ingest symbol info')
     return StandardQuoteIngester.ingest_basic(self._market)
Esempio n. 14
0
    def _winning_probability(self,
                             forecast_data,
                             forecast_days=(1, 3, 5),
                             quote_data=None,
                             market='US'):
        """
        计算胜率
        :param forecast_data: ditc[ list ]
                            { datetime.datetime(2019, 9, 27, 0, 0):
                                  [{'metrics': 'three_red_soldiers',
                                    'date': datetime.datetime(2019, 9, 27, 0, 0),
                                    'market': 'US',
                                    'direction': 1,                                               # 预测未来涨跌方向
                                    'data': ['HGH', 'LEO', 'EE', 'BBDC', 'NCA', 'USAC', 'MHI']},
                                    ....
                                   ],
                             .....
                            }
        :param forecast_days:
                            需要检验的未来天数列表, (1, 3, 5)
        :param market:
        :return: dict
                 {'three_red_soldiers':  {1: 0.49767227902371464,
                                          3: 0.43380883095574463,
                                          5: 0.4075340831245499},
                   'three_crow':  {1: 0.563627227684226,
                                   3: 0.5850837879834898,
                                   5: 0.5889460044487472},
                  ......
                 }
        """
        from collections import defaultdict

        winning = defaultdict(lambda: defaultdict(list))

        start_date = min(forecast_data.keys())
        end_date = max(forecast_data.keys())

        if quote_data is None:
            effect_end_date = self.get_date_offset(
                end_date, bar_count=-max(forecast_days), market=market)
            quote_data = self.get_daily_hists(None,
                                              start_date=start_date,
                                              end_date=effect_end_date,
                                              fields=('close', ),
                                              market=market)

        quote_data.set_index(['timestamp', 'symbol'], inplace=True)
        quote_data.sort_index(inplace=True)
        quote_data = quote_data['close'].unstack()
        logger.debug(f'data: {quote_data.tail()}')

        future_winning_dict = {}
        for d in forecast_days:
            pct_change = quote_data.pct_change(d).shift(-d)
            pct_change.dropna(how='all', inplace=True)
            future_winning_dict[d] = pct_change > 0

        logger.debug(f'd1_pct_change: {future_winning_dict[1].tail()}')

        for dt, metrics in forecast_data.items():

            for m in metrics:
                name = m['metrics']
                symbols = m['data']
                direction = m['direction']
                symbols_number = len(symbols)
                try:
                    for d, fu in future_winning_dict.items():
                        try:
                            winning_count = fu.loc[dt, symbols].sum()
                        except KeyError:
                            logger.debug(
                                f'winning at {dt} con`t calc for quote is less {max(forecast_days)} after'
                            )
                            continue
                        winning_pct = int(winning_count / symbols_number)
                        if direction == -1:
                            winning_pct = 1 - winning_pct
                        winning[name][d].append(winning_pct)

                except KeyError as e:
                    logger.info(e)

        for k, v in winning.items():
            for d, pct in v.items():
                win_avg_pct = sum(pct) / len(pct)
                winning[k][d] = win_avg_pct
        return winning