def load_group_df( self, instruments, exprs: list, names: list, start_time: Union[str, pd.Timestamp] = None, end_time: Union[str, pd.Timestamp] = None, gp_name: str = None, ) -> pd.DataFrame: if instruments is None: warnings.warn("`instruments` is not set, will load all stocks") instruments = "all" if isinstance(instruments, str): instruments = D.instruments(instruments, filter_pipe=self.filter_pipe) elif self.filter_pipe is not None: warnings.warn("`filter_pipe` is not None, but it will not be used with `instruments` as list") freq = self.freq[gp_name] if isinstance(self.freq, dict) else self.freq df = D.features( instruments, exprs, start_time, end_time, freq=freq, inst_processors=self.inst_processor.get(gp_name, []) ) df.columns = names if self.swap_level: df = df.swaplevel().sort_index() # NOTE: if swaplevel, return <datetime, instrument> return df
def load_data(self): ret = D.calendar(start_time='2010-01-01', end_time='2017-12-31', freq='day')[:2] print(ret) instruments = D.instruments('csi300')# ['SH600570','SH600000'] fields = ['$close', '$volume', 'Ref($close, 1)', 'Mean($close, 3)', '$high-$low'] data = D.features(instruments, fields, start_time='2010-01-01', end_time='2017-12-31', freq='day')
def test_2_dump_instruments(self): ori_ins = set( map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv"))) res_ins = set(D.list_instruments(D.instruments("all"), as_list=True)) assert len(ori_ins - res_ins) == len(ori_ins - res_ins) == 0, "dump instruments failed"
def test_0_qlib_data(self): GetData().qlib_data_cn(QLIB_DIR) df = D.features(D.instruments("csi300"), self.FIELDS) self.assertListEqual(list(df.columns), self.FIELDS, "get qlib data failed") self.assertFalse(df.dropna().empty, "get qlib data failed")
def test_handler_storage(self): # init data handler data_handler = TestHandler(**self.data_handler_kwargs) # init data handler with hasing storage data_handler_hs = TestHandler(**self.data_handler_kwargs, infer_processors=["HashStockFormat"]) fetch_start_time = "2019-01-01" fetch_end_time = "2019-12-31" instruments = D.instruments(market=self.market) instruments = D.list_instruments(instruments=instruments, start_time=fetch_start_time, end_time=fetch_end_time, as_list=True) with TimeInspector.logt("random fetch with DataFrame Storage"): # single stock for i in range(100): random_index = np.random.randint(len(instruments), size=1)[0] fetch_stock = instruments[random_index] data_handler.fetch(selector=(fetch_stock, slice(fetch_start_time, fetch_end_time)), level=None) # multi stocks for i in range(100): random_indexs = np.random.randint(len(instruments), size=5) fetch_stocks = [ instruments[_index] for _index in random_indexs ] data_handler.fetch(selector=(fetch_stocks, slice(fetch_start_time, fetch_end_time)), level=None) with TimeInspector.logt("random fetch with HasingStock Storage"): # single stock for i in range(100): random_index = np.random.randint(len(instruments), size=1)[0] fetch_stock = instruments[random_index] data_handler_hs.fetch(selector=(fetch_stock, slice(fetch_start_time, fetch_end_time)), level=None) # multi stocks for i in range(100): random_indexs = np.random.randint(len(instruments), size=5) fetch_stocks = [ instruments[_index] for _index in random_indexs ] data_handler_hs.fetch(selector=(fetch_stocks, slice(fetch_start_time, fetch_end_time)), level=None)
def testClose(self): close_p = D.features(D.instruments("csi300"), ["Ref($close, 1)/$close - 1"]) close_desc = close_p.describe(percentiles=np.arange(0.1, 1.0, 0.1)) print(close_desc) self.assertLessEqual(abs(close_desc.loc["90%"][0]), 0.1, "Close value is abnormal") self.assertLessEqual(abs(close_desc.loc["10%"][0]), 0.1, "Close value is abnormal")
def _get_old_data(self, qlib_data_dir: [str, Path]): import qlib from qlib.data import D qlib_data_dir = str(Path(qlib_data_dir).expanduser().resolve()) qlib.init(provider_uri=qlib_data_dir, expression_cache=None, dataset_cache=None) df = D.features(D.instruments("all"), ["$close/$factor", "$adjclose/$close"]) df.columns = [self._ori_close_field, self._first_close_field] return df
def _get_all_1d_data(self): import qlib from qlib.data import D qlib.init(provider_uri=self.qlib_data_1d_dir) df = D.features(D.instruments("all"), ["$paused", "$volume", "$factor", "$close"], freq="day") df.reset_index(inplace=True) df.rename(columns={"datetime": self._date_field_name, "instrument": self._symbol_field_name}, inplace=True) df.columns = list(map(lambda x: x[1:] if x.startswith("$") else x, df.columns)) return df
def fill_1min_using_1d( data_1min_dir: [str, Path], qlib_data_1d_dir: [str, Path], max_workers: int = 16, date_field_name: str = "date", symbol_field_name: str = "symbol", ): """Use 1d data to fill in the missing symbols relative to 1min Parameters ---------- data_1min_dir: str 1min data dir qlib_data_1d_dir: str 1d qlib data(bin data) dir, from: https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format max_workers: int ThreadPoolExecutor(max_workers), by default 16 date_field_name: str date field name, by default date symbol_field_name: str symbol field name, by default symbol """ data_1min_dir = Path(data_1min_dir).expanduser().resolve() qlib_data_1d_dir = Path(qlib_data_1d_dir).expanduser().resolve() min_date, max_date = get_date_range(data_1min_dir, max_workers, date_field_name) symbols_1min = get_symbols(data_1min_dir) qlib.init(provider_uri=str(qlib_data_1d_dir)) data_1d = D.features(D.instruments("all"), ["$close"], min_date, max_date, freq="day") miss_symbols = set(data_1d.index.get_level_values(level="instrument").unique()) - set(symbols_1min) if not miss_symbols: logger.warning("More symbols in 1min than 1d, no padding required") return logger.info(f"miss_symbols {len(miss_symbols)}: {miss_symbols}") tmp_df = pd.read_csv(list(data_1min_dir.glob("*.csv"))[0]) columns = tmp_df.columns _si = tmp_df[symbol_field_name].first_valid_index() is_lower = tmp_df.loc[_si][symbol_field_name].islower() for symbol in tqdm(miss_symbols): if is_lower: symbol = symbol.lower() index_1d = data_1d.loc(axis=0)[symbol.upper()].index index_1min = generate_minutes_calendar_from_daily(index_1d) index_1min.name = date_field_name _df = pd.DataFrame(columns=columns, index=index_1min) if date_field_name in _df.columns: del _df[date_field_name] _df.reset_index(inplace=True) _df[symbol_field_name] = symbol _df["paused_num"] = 0 _df.to_csv(data_1min_dir.joinpath(f"{symbol}.csv"), index=False)
def test_0_qlib_data(self): GetData().qlib_data(name="qlib_data_simple", target_dir=QLIB_DIR, region="cn", interval="1d", version="latest") df = D.features(D.instruments("csi300"), self.FIELDS) self.assertListEqual(list(df.columns), self.FIELDS, "get qlib data failed") self.assertFalse(df.dropna().empty, "get qlib data failed")
def testClose(self): close_p = D.features(D.instruments('csi300'), ['Ref($close, 1)/$close - 1']) close_desc = close_p.describe(percentiles=np.arange(0.1, 0.9, 0.1)) print(close_desc) self.assertLessEqual(abs(close_desc.loc["80%"][0]), 0.1, "Close value is abnormal") self.assertLessEqual(abs(close_desc.loc["max"][0]), 0.2, "Close value is abnormal") self.assertGreaterEqual(close_desc.loc["min"][0], -0.2, "Close value is abnormal")
def test_setting(self): # All the query below passes df = D.features(["SH600519"], ["ChangeInstrument('SH000300', $close)"]) # get market return for "SH600519" df = D.features(["SH600519"], ["ChangeInstrument('SH000300', Feature('close')/Ref(Feature('close'),1) -1)"]) df = D.features(["SH600519"], ["ChangeInstrument('SH000300', $close/Ref($close,1) -1)"]) # excess return df = D.features( ["SH600519"], ["($close/Ref($close,1) -1) - ChangeInstrument('SH000300', $close/Ref($close,1) -1)"] ) print(df)
def load_dataset(market='csi300'): # features fields = [] names = [] fields += ['$open/$close'] # NOTE: Ref($open, 0) != $open fields += ['Ref($open, %d)/$close' % d for d in range(1, 60)] names += ['OPEN%d'%d for d in range(60)] fields += ['$high/$close'] fields += ['Ref($high, %d)/$close' % d for d in range(1, 60)] names += ['HIGH%d'%d for d in range(60)] fields += ['$low/$close'] fields += ['Ref($low, %d)/$close' % d for d in range(1, 60)] names += ['LOW%d'%d for d in range(60)] fields += ['$close/$close'] # 1 fields += ['Ref($close, %d)/$close' % d for d in range(1, 60)] names += ['CLOSE%d'%d for d in range(60)] fields += ['$vwap/$close'] fields += ['Ref($vwap, %d)/$close' % d for d in range(1, 60)] names += ['VWAP%d'%d for d in range(60)] # fields += ['Log($volume/$volume)'] # 1 # fields += ['Log(Ref($volume, %d)/$volume)' % d for d in range(1, 60)] # names += ['VOLUME%d'%d for d in range(60)] fields += ['$volume/$volume'] # 1 fields += ['Ref($volume, %d)/$volume' % d for d in range(1, 60)] names += ['VOLUME%d'%d for d in range(60)] # labels labels = ['Ref($vwap, -2)/Ref($vwap, -1)-1'] label_names = ['LABEL0'] ## load features print('loading features...') df = D.features(D.instruments(market), fields, start_time='2007-01-01') df.columns = names print('load features over') ## load labels if len(labels): print('loading labels...') df_labels = D.features(D.instruments('all'), labels, start_time='2007-01-01') df_labels.columns = label_names df[label_names] = df_labels print('load labels over') return df, names, label_names
def load_group_df(self, instruments, exprs: list, names: list, start_time=None, end_time=None) -> pd.DataFrame: if instruments is None: warnings.warn("`instruments` is not set, will load all stocks") instruments = "all" if isinstance(instruments, str): instruments = D.instruments(instruments, filter_pipe=self.filter_pipe) elif self.filter_pipe is not None: warnings.warn("`filter_pipe` is not None, but it will not be used with `instruments` as list") df = D.features(instruments, exprs, start_time, end_time) df.columns = names df = df.swaplevel().sort_index() # NOTE: always return <datetime, instrument> return df
def testCSI300(self): close_p = D.features(D.instruments("csi300"), ["$close"]) size = close_p.groupby("datetime").size() cnt = close_p.groupby("datetime").count()["$close"] size_desc = size.describe(percentiles=np.arange(0.1, 1.0, 0.1)) cnt_desc = cnt.describe(percentiles=np.arange(0.1, 1.0, 0.1)) print(size_desc) print(cnt_desc) self.assertLessEqual(size_desc.loc["max"], 305, "Excessive number of CSI300 constituent stocks") self.assertGreaterEqual(size_desc.loc["80%"], 290, "Insufficient number of CSI300 constituent stocks") self.assertLessEqual(cnt_desc.loc["max"], 305, "Excessive number of CSI300 constituent stocks")
def get_date_by_shift(trading_date, shift, future=False, clip_shift=True): """get trading date with shift bias wil cur_date e.g. : shift == 1, return next trading date shift == -1, return previous trading date ---------- trading_date : pandas.Timestamp current date shift : int clip_shift: bool """ from qlib.data import D cal = D.calendar(future=future) if pd.to_datetime(trading_date) not in list(cal): raise ValueError("{} is not trading day!".format(str(trading_date))) _index = bisect.bisect_left(cal, trading_date) shift_index = _index + shift if shift_index < 0 or shift_index >= len(cal): if clip_shift: shift_index = np.clip(shift_index, 0, len(cal) - 1) else: raise IndexError( f"The shift_index({shift_index}) of the trading day ({trading_date}) is out of range" ) return cal[shift_index]
def test_expr(self): fields = [ "P(Mean($$roewa_q, 1))", "P($$roewa_q)", "P(Mean($$roewa_q, 2))", "P(Ref($$roewa_q, 1))", "P((Ref($$roewa_q, 1) +$$roewa_q) / 2)", ] instruments = ["sh600519"] data = D.features(instruments, fields, start_time="2019-01-01", end_time="2019-07-19", freq="day") expect = """ P(Mean($$roewa_q, 1)) P($$roewa_q) P(Mean($$roewa_q, 2)) P(Ref($$roewa_q, 1)) P((Ref($$roewa_q, 1) +$$roewa_q) / 2) instrument datetime sh600519 2019-07-01 0.094737 0.094737 0.219691 0.344644 0.219691 2019-07-02 0.094737 0.094737 0.219691 0.344644 0.219691 2019-07-03 0.094737 0.094737 0.219691 0.344644 0.219691 2019-07-04 0.094737 0.094737 0.219691 0.344644 0.219691 2019-07-05 0.094737 0.094737 0.219691 0.344644 0.219691 2019-07-08 0.094737 0.094737 0.219691 0.344644 0.219691 2019-07-09 0.094737 0.094737 0.219691 0.344644 0.219691 2019-07-10 0.094737 0.094737 0.219691 0.344644 0.219691 2019-07-11 0.094737 0.094737 0.219691 0.344644 0.219691 2019-07-12 0.094737 0.094737 0.219691 0.344644 0.219691 2019-07-15 0.000000 0.000000 0.047369 0.094737 0.047369 2019-07-16 0.000000 0.000000 0.047369 0.094737 0.047369 2019-07-17 0.000000 0.000000 0.047369 0.094737 0.047369 2019-07-18 0.175322 0.175322 0.135029 0.094737 0.135029 2019-07-19 0.175322 0.175322 0.135029 0.094737 0.135029 """ self.check_same(data.tail(15), expect)
def test_query(self): instruments = ["sh600519"] fields = ["P($$roewa_q)", "P($$yoyni_q)"] # Mao Tai published 2019Q2 report at 2019-07-13 & 2019-07-18 # - http://www.cninfo.com.cn/new/commonUrl/pageOfSearch?url=disclosure/list/search&lastPage=index data = D.features(instruments, fields, start_time="2019-01-01", end_time="2019-07-19", freq="day") res = """ P($$roewa_q) P($$yoyni_q) count 133.000000 133.000000 mean 0.196412 0.277930 std 0.097591 0.030262 min 0.000000 0.243892 25% 0.094737 0.243892 50% 0.255220 0.304181 75% 0.255220 0.305041 max 0.344644 0.305041 """ self.check_same(data.describe(), res) res = """ P($$roewa_q) P($$yoyni_q) instrument datetime sh600519 2019-07-15 0.000000 0.305041 2019-07-16 0.000000 0.305041 2019-07-17 0.000000 0.305041 2019-07-18 0.175322 0.252650 2019-07-19 0.175322 0.252650 """ self.check_same(data.tail(), res)
def test_pref_operator(self): instruments = ["sh600519"] fields = [ "PRef($$roewa_q, 201902)", "PRef($$yoyni_q, 201801)", "P($$roewa_q)", "P($$roewa_q) / PRef($$roewa_q, 201801)", ] data = D.features(instruments, fields, start_time="2018-04-28", end_time="2019-07-19", freq="day") except_data = """ PRef($$roewa_q, 201902) PRef($$yoyni_q, 201801) P($$roewa_q) P($$roewa_q) / PRef($$roewa_q, 201801) instrument datetime sh600519 2018-05-02 NaN 0.395075 0.088887 1.000000 2018-05-03 NaN 0.395075 0.088887 1.000000 2018-05-04 NaN 0.395075 0.088887 1.000000 2018-05-07 NaN 0.395075 0.088887 1.000000 2018-05-08 NaN 0.395075 0.088887 1.000000 ... ... ... ... ... 2019-07-15 0.000000 0.395075 0.000000 0.000000 2019-07-16 0.000000 0.395075 0.000000 0.000000 2019-07-17 0.000000 0.395075 0.000000 0.000000 2019-07-18 0.175322 0.395075 0.175322 1.972414 2019-07-19 0.175322 0.395075 0.175322 1.972414 [299 rows x 4 columns] """ self.check_same(data, except_data)
def test_expr2(self): instruments = ["sh600519"] fields = ["P($$roewa_q)", "P($$yoyni_q)"] fields += ["P(($$roewa_q / $$yoyni_q) / Ref($$roewa_q / $$yoyni_q, 1) - 1)"] fields += ["P(Sum($$yoyni_q, 4))"] fields += ["$close", "P($$roewa_q) * $close"] data = D.features(instruments, fields, start_time="2019-01-01", end_time="2020-01-01", freq="day") except_data = """ P($$roewa_q) P($$yoyni_q) P(($$roewa_q / $$yoyni_q) / Ref($$roewa_q / $$yoyni_q, 1) - 1) P(Sum($$yoyni_q, 4)) $close P($$roewa_q) * $close instrument datetime sh600519 2019-01-02 0.255220 0.243892 1.484224 1.661578 63.595333 16.230801 2019-01-03 0.255220 0.243892 1.484224 1.661578 62.641907 15.987467 2019-01-04 0.255220 0.243892 1.484224 1.661578 63.915985 16.312637 2019-01-07 0.255220 0.243892 1.484224 1.661578 64.286530 16.407207 2019-01-08 0.255220 0.243892 1.484224 1.661578 64.212196 16.388237 ... ... ... ... ... ... ... 2019-12-25 0.255819 0.219821 0.677052 1.081693 122.150467 31.248409 2019-12-26 0.255819 0.219821 0.677052 1.081693 122.301315 31.286999 2019-12-27 0.255819 0.219821 0.677052 1.081693 125.307404 32.056015 2019-12-30 0.255819 0.219821 0.677052 1.081693 127.763992 32.684456 2019-12-31 0.255819 0.219821 0.677052 1.081693 127.462303 32.607277 [244 rows x 6 columns] """ self.check_same(data, except_data)
def test_2_dump_features(self): self.DUMP_DATA.dump_features(include_fields=self.FIELDS) df = D.features(self.STOCK_NAMES, self.QLIB_FIELDS) TestDumpData.SIMPLE_DATA = df.loc(axis=0)[self.STOCK_NAMES[0], :] self.assertFalse(df.dropna().empty, "features data failed") self.assertListEqual(list(df.columns), self.QLIB_FIELDS, "features columns failed")
def test_case(instruments, queries, note=None): if note: print(note) print(f"checking {instruments} with queries {queries}") df = D.features(instruments, queries) print(df) return df
def clear_task(body): """Callback function when initialize rabbitmq.""" tbody = pickle.loads(body) ttype = tbody["meta"]["type"] task_uri = D._uri(ttype, **(tbody["args"])) # delete task pop_ssids_from_redis(task_uri)
def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None): """get trading date with shift bias will cur_date e.g. : shift == 1, return next trading date shift == -1, return previous trading date ---------- trading_date : pandas.Timestamp current date shift : int clip_shift: bool align : Optional[str] When align is None, this function will raise ValueError if `trading_date` is not a trading date when align is "left"/"right", it will try to align to left/right nearest trading date before shifting when `trading_date` is not a trading date """ from qlib.data import D # pylint: disable=C0415 cal = D.calendar(future=future, freq=freq) trading_date = pd.to_datetime(trading_date) if align is None: if trading_date not in list(cal): raise ValueError("{} is not trading day!".format(str(trading_date))) _index = bisect.bisect_left(cal, trading_date) elif align == "left": _index = bisect.bisect_right(cal, trading_date) - 1 elif align == "right": _index = bisect.bisect_left(cal, trading_date) else: raise ValueError(f"align with value `{align}` is not supported") shift_index = _index + shift if shift_index < 0 or shift_index >= len(cal): if clip_shift: shift_index = np.clip(shift_index, 0, len(cal) - 1) else: raise IndexError(f"The shift_index({shift_index}) of the trading day ({trading_date}) is out of range") return cal[shift_index]
def task_callback(self, ch, method, properties, body): """Callback function when a published task is received. When a published task is received from rabbitmq, a new process will be established to attend to the task. `self.channel.basic_qos(prefetch_count=1)` is used to control the maximum concurrency of data processing process. """ self.logger.debug("Receive task from queue at %f" % time.time()) tbody = pickle.loads(body) ttype = tbody["meta"]["type"] ssid = tbody["meta"]["ssid"] self.logger.info("receive %s task : '%.200s'" % (ttype, tbody)) task_uri = D._uri(ttype, **(tbody["args"])) self.logger.debug("check task at %f" % time.time()) qlen = add_to_task_l_and_check_qlen(task_uri, ssid) if qlen == 1: # first to create the task queue # no task is running # here the data processes will not use the historical memory cache as before # acutally the memory cache is used for accelerate the inside of a # process self.logger.debug("start processing data at %f" % time.time()) # In order to no longer clear the MemoryCache, a process has been created here. p = multiprocessing.Process(target=getattr(self, "%s_callback" % ttype), args=(tbody["args"], task_uri)) p.start() p.join() else: self.logger.debug(f"There has already been the same task. Just append the ssid {ssid}.") ch.basic_ack(delivery_tag=method.delivery_tag)
def __init__(self, record: Recorder, to_date=None, hist_ref: int = 0, freq="day"): """ Init PredUpdater. Args: record : Recorder to_date : update to prediction to the `to_date` hist_ref : int Sometimes, the dataset will have historical depends. Leave the problem to users to set the length of historical dependency .. note:: the start_time is not included in the hist_ref """ # TODO: automate this hist_ref in the future. super().__init__(record=record) self.to_date = to_date self.hist_ref = hist_ref self.freq = freq self.rmdl = RMDLoader(rec=record) if to_date == None: to_date = D.calendar(freq=freq)[-1] self.to_date = pd.Timestamp(to_date) self.old_pred = record.load_object("pred.pkl") self.last_end = self.old_pred.index.get_level_values("datetime").max()
def _compare(self, file_path: Path): symbol = file_path.name.strip(self.file_suffix) if symbol.lower() not in self.qlib_symbols: return self.NOT_IN_FEATURES # qlib data qlib_df = D.features([symbol], self.qlib_fields, freq=self.freq) qlib_df.rename(columns={_c: _c.strip("$") for _c in qlib_df.columns}, inplace=True) # csv data origin_df = pd.read_csv(file_path) origin_df[self.date_field_name] = pd.to_datetime( origin_df[self.date_field_name]) if self.symbol_field_name not in origin_df.columns: origin_df[self.symbol_field_name] = symbol origin_df.set_index([self.symbol_field_name, self.date_field_name], inplace=True) origin_df.index.names = qlib_df.index.names try: compare = datacompy.Compare( origin_df, qlib_df, on_index=True, abs_tol=1e-08, # Optional, defaults to 0 rel_tol=1e-05, # Optional, defaults to 0 df1_name="Original", # Optional, defaults to 'df1' df2_name="New", # Optional, defaults to 'df2' ) _r = compare.matches(ignore_extra_columns=True) return self.COMPARE_TRUE if _r else self.COMPARE_FALSE except Exception as e: logger.warning(f"{symbol} compare error: {e}") return self.COMPARE_ERROR
def instrument_callback(self, ibody, task_uri): """Target function for the established process when the received task asks for instrument data. Call the data provider to acquire data and publish the instrument data. """ instruments = ibody["instruments"] start_time = ibody["start_time"] end_time = ibody["end_time"] if start_time == "None": start_time = None if end_time == "None": end_time = None freq = ibody["freq"] as_list = ibody["as_list"] status_code = 0 # TODO: add exceptions detection and modify status_code self.logger.debug("process instrument data at %f" % time.time()) try: instrument_result = D.list_instruments(instruments, start_time, end_time, freq, as_list) if isinstance(instrument_result, dict): instrument_result = {i: [(str(s), str(e)) for s, e in t] for i, t in instrument_result.items()} self.logger.debug("finish processing instrument data and publish message at %f" % time.time()) self.publish_message("instrument", instrument_result, status_code, task_uri) except Exception as e: self.logger.exception(f"Error while processing request %.200s" % e) self.publish_message("instrument", None, 1, task_uri, str(e))
def test_no_exist_data(self): fields = ["P($$roewa_q)", "P($$yoyni_q)", "$close"] data = D.features(["sh600519", "sh601988"], fields, start_time="2019-01-01", end_time="2019-07-19", freq="day") data[ "$close"] = 1 # in case of different dataset gives different values expect = """ P($$roewa_q) P($$yoyni_q) $close instrument datetime sh600519 2019-01-02 0.25522 0.243892 1 2019-01-03 0.25522 0.243892 1 2019-01-04 0.25522 0.243892 1 2019-01-07 0.25522 0.243892 1 2019-01-08 0.25522 0.243892 1 ... ... ... ... sh601988 2019-07-15 NaN NaN 1 2019-07-16 NaN NaN 1 2019-07-17 NaN NaN 1 2019-07-18 NaN NaN 1 2019-07-19 NaN NaN 1 [266 rows x 3 columns] """ self.check_same(data, expect)
def test_exp_06(self): t = 3 expr6_price_func = ( lambda name, index, method: f'2 * (TResample(${name}{index}, "{t}s", "{method}") - Ref(TResample(${name}{index}, "{t}s", "{method}"), 1)) / {t}' ) exprs = [] names = [] for i in range(1, 11): for name in ["bid", "ask"]: exprs.append( f"TResample({expr6_price_func(name, i, 'last')}, '1min', 'mean') / {self.expr_sum_buy_ask_1}" ) names.append(f"p_diff_{name}{i}_{t}s") for i in range(1, 11): for name in ["asize", "bsize"]: exprs.append( f"TResample({expr6_price_func(name, i, 'mean')}, '1min', 'mean') / {self.total_volume}" ) names.append(f"v_diff_{name}{i}_{t}s") df = D.features(self.stocks_list, fields=exprs, freq="ticks") df.columns = names print(df)