Ejemplos de D.features en Python, ejemplos de qlib.data.D.features en Python

Ejemplo n.º 1

0

Mostrar archivo

    def test_setting(self):
        # All the query below passes
        df = D.features(["SH600519"], ["ChangeInstrument('SH000300', $close)"])

        # get market return for "SH600519"
        df = D.features(["SH600519"], ["ChangeInstrument('SH000300', Feature('close')/Ref(Feature('close'),1) -1)"])
        df = D.features(["SH600519"], ["ChangeInstrument('SH000300', $close/Ref($close,1) -1)"])
        # excess return
        df = D.features(
            ["SH600519"], ["($close/Ref($close,1) -1) - ChangeInstrument('SH000300', $close/Ref($close,1) -1)"]
        )
        print(df)

Ejemplo n.º 2

0

Mostrar archivo

def load_dataset(market='csi300'):

    # features
    fields = []
    names = []

    fields += ['$open/$close']  # NOTE: Ref($open, 0) != $open
    fields += ['Ref($open, %d)/$close' % d for d in range(1, 60)]
    names  += ['OPEN%d'%d for d in range(60)]

    fields += ['$high/$close']
    fields += ['Ref($high, %d)/$close' % d for d in range(1, 60)]
    names  += ['HIGH%d'%d for d in range(60)]

    fields += ['$low/$close']
    fields += ['Ref($low, %d)/$close' % d for d in range(1, 60)]
    names  += ['LOW%d'%d for d in range(60)]

    fields += ['$close/$close']  # 1
    fields += ['Ref($close, %d)/$close' % d for d in range(1, 60)]
    names  += ['CLOSE%d'%d for d in range(60)]

    fields += ['$vwap/$close']
    fields += ['Ref($vwap, %d)/$close' % d for d in range(1, 60)]
    names  += ['VWAP%d'%d for d in range(60)]

    # fields += ['Log($volume/$volume)']  # 1
    # fields += ['Log(Ref($volume, %d)/$volume)' % d for d in range(1, 60)]
    # names  += ['VOLUME%d'%d for d in range(60)]

    fields += ['$volume/$volume']  # 1
    fields += ['Ref($volume, %d)/$volume' % d for d in range(1, 60)]
    names  += ['VOLUME%d'%d for d in range(60)]

    # labels
    labels = ['Ref($vwap, -2)/Ref($vwap, -1)-1']
    label_names = ['LABEL0']

    ## load features
    print('loading features...')
    df = D.features(D.instruments(market), fields, start_time='2007-01-01')
    df.columns = names
    print('load features over')
    ## load labels
    if len(labels):
        print('loading labels...')
        df_labels = D.features(D.instruments('all'), labels, start_time='2007-01-01')
        df_labels.columns = label_names
        df[label_names] = df_labels
        print('load labels over')

    return df, names, label_names

Ejemplo n.º 3

0

Mostrar archivo

 def _compare(self, file_path: Path):
     symbol = file_path.name.strip(self.file_suffix)
     if symbol.lower() not in self.qlib_symbols:
         return self.NOT_IN_FEATURES
     # qlib data
     qlib_df = D.features([symbol], self.qlib_fields, freq=self.freq)
     qlib_df.rename(columns={_c: _c.strip("$")
                             for _c in qlib_df.columns},
                    inplace=True)
     # csv data
     origin_df = pd.read_csv(file_path)
     origin_df[self.date_field_name] = pd.to_datetime(
         origin_df[self.date_field_name])
     if self.symbol_field_name not in origin_df.columns:
         origin_df[self.symbol_field_name] = symbol
     origin_df.set_index([self.symbol_field_name, self.date_field_name],
                         inplace=True)
     origin_df.index.names = qlib_df.index.names
     try:
         compare = datacompy.Compare(
             origin_df,
             qlib_df,
             on_index=True,
             abs_tol=1e-08,  # Optional, defaults to 0
             rel_tol=1e-05,  # Optional, defaults to 0
             df1_name="Original",  # Optional, defaults to 'df1'
             df2_name="New",  # Optional, defaults to 'df2'
         )
         _r = compare.matches(ignore_extra_columns=True)
         return self.COMPARE_TRUE if _r else self.COMPARE_FALSE
     except Exception as e:
         logger.warning(f"{symbol} compare error: {e}")
         return self.COMPARE_ERROR

Ejemplo n.º 4

0

Mostrar archivo

Archivo: qlib_mgr.py Proyecto: ailabx/ailabx

    def load_data(self):
        ret = D.calendar(start_time='2010-01-01', end_time='2017-12-31', freq='day')[:2]
        print(ret)

        instruments = D.instruments('csi300')# ['SH600570','SH600000']
        fields = ['$close', '$volume', 'Ref($close, 1)', 'Mean($close, 3)', '$high-$low']
        data = D.features(instruments, fields, start_time='2010-01-01', end_time='2017-12-31', freq='day')

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_dump_data.py Proyecto: yihang666/qlib

 def test_2_dump_features(self):
     self.DUMP_DATA.dump_features(include_fields=self.FIELDS)
     df = D.features(self.STOCK_NAMES, self.QLIB_FIELDS)
     TestDumpData.SIMPLE_DATA = df.loc(axis=0)[self.STOCK_NAMES[0], :]
     self.assertFalse(df.dropna().empty, "features data failed")
     self.assertListEqual(list(df.columns), self.QLIB_FIELDS,
                          "features columns failed")

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_get_data.py Proyecto: yelianjin/qlib

    def test_0_qlib_data(self):

        GetData().qlib_data_cn(QLIB_DIR)
        df = D.features(D.instruments("csi300"), self.FIELDS)
        self.assertListEqual(list(df.columns), self.FIELDS,
                             "get qlib data failed")
        self.assertFalse(df.dropna().empty, "get qlib data failed")

Ejemplo n.º 7

0

Mostrar archivo

    def load_group_df(
        self,
        instruments,
        exprs: list,
        names: list,
        start_time: Union[str, pd.Timestamp] = None,
        end_time: Union[str, pd.Timestamp] = None,
        gp_name: str = None,
    ) -> pd.DataFrame:
        if instruments is None:
            warnings.warn("`instruments` is not set, will load all stocks")
            instruments = "all"
        if isinstance(instruments, str):
            instruments = D.instruments(instruments, filter_pipe=self.filter_pipe)
        elif self.filter_pipe is not None:
            warnings.warn("`filter_pipe` is not None, but it will not be used with `instruments` as list")

        freq = self.freq[gp_name] if isinstance(self.freq, dict) else self.freq
        df = D.features(
            instruments, exprs, start_time, end_time, freq=freq, inst_processors=self.inst_processor.get(gp_name, [])
        )
        df.columns = names
        if self.swap_level:
            df = df.swaplevel().sort_index()  # NOTE: if swaplevel, return <datetime, instrument>
        return df

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_pit.py Proyecto: yutiansut/qlib

    def test_query(self):
        instruments = ["sh600519"]
        fields = ["P($$roewa_q)", "P($$yoyni_q)"]
        # Mao Tai published 2019Q2 report at 2019-07-13 & 2019-07-18
        # - http://www.cninfo.com.cn/new/commonUrl/pageOfSearch?url=disclosure/list/search&lastPage=index
        data = D.features(instruments, fields, start_time="2019-01-01", end_time="2019-07-19", freq="day")
        res = """
               P($$roewa_q)  P($$yoyni_q)
        count    133.000000    133.000000
        mean       0.196412      0.277930
        std        0.097591      0.030262
        min        0.000000      0.243892
        25%        0.094737      0.243892
        50%        0.255220      0.304181
        75%        0.255220      0.305041
        max        0.344644      0.305041
        """
        self.check_same(data.describe(), res)

        res = """
                               P($$roewa_q)  P($$yoyni_q)
        instrument datetime
        sh600519   2019-07-15      0.000000      0.305041
                   2019-07-16      0.000000      0.305041
                   2019-07-17      0.000000      0.305041
                   2019-07-18      0.175322      0.252650
                   2019-07-19      0.175322      0.252650
        """
        self.check_same(data.tail(), res)

Ejemplo n.º 9

0

Mostrar archivo

    def test_no_exist_data(self):
        fields = ["P($$roewa_q)", "P($$yoyni_q)", "$close"]
        data = D.features(["sh600519", "sh601988"],
                          fields,
                          start_time="2019-01-01",
                          end_time="2019-07-19",
                          freq="day")
        data[
            "$close"] = 1  # in case of different dataset gives different values
        expect = """
                               P($$roewa_q)  P($$yoyni_q)  $close
        instrument datetime
        sh600519   2019-01-02       0.25522      0.243892       1
                   2019-01-03       0.25522      0.243892       1
                   2019-01-04       0.25522      0.243892       1
                   2019-01-07       0.25522      0.243892       1
                   2019-01-08       0.25522      0.243892       1
        ...                             ...           ...     ...
        sh601988   2019-07-15           NaN           NaN       1
                   2019-07-16           NaN           NaN       1
                   2019-07-17           NaN           NaN       1
                   2019-07-18           NaN           NaN       1
                   2019-07-19           NaN           NaN       1

        [266 rows x 3 columns]
        """
        self.check_same(data, expect)

Ejemplo n.º 10

0

Mostrar archivo

    def test_exp_06(self):
        t = 3
        expr6_price_func = (
            lambda name, index, method:
            f'2 * (TResample(${name}{index}, "{t}s", "{method}") - Ref(TResample(${name}{index}, "{t}s", "{method}"), 1)) / {t}'
        )
        exprs = []
        names = []
        for i in range(1, 11):
            for name in ["bid", "ask"]:
                exprs.append(
                    f"TResample({expr6_price_func(name, i, 'last')}, '1min', 'mean') / {self.expr_sum_buy_ask_1}"
                )
                names.append(f"p_diff_{name}{i}_{t}s")

        for i in range(1, 11):
            for name in ["asize", "bsize"]:
                exprs.append(
                    f"TResample({expr6_price_func(name, i, 'mean')}, '1min', 'mean') / {self.total_volume}"
                )
                names.append(f"v_diff_{name}{i}_{t}s")

        df = D.features(self.stocks_list, fields=exprs, freq="ticks")
        df.columns = names
        print(df)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_pit.py Proyecto: yutiansut/qlib

 def test_pref_operator(self):
     instruments = ["sh600519"]
     fields = [
         "PRef($$roewa_q, 201902)",
         "PRef($$yoyni_q, 201801)",
         "P($$roewa_q)",
         "P($$roewa_q) / PRef($$roewa_q, 201801)",
     ]
     data = D.features(instruments, fields, start_time="2018-04-28", end_time="2019-07-19", freq="day")
     except_data = """
                            PRef($$roewa_q, 201902)  PRef($$yoyni_q, 201801)  P($$roewa_q)  P($$roewa_q) / PRef($$roewa_q, 201801)
     instrument datetime                                                                                                          
     sh600519   2018-05-02                      NaN                 0.395075      0.088887                                1.000000
                2018-05-03                      NaN                 0.395075      0.088887                                1.000000
                2018-05-04                      NaN                 0.395075      0.088887                                1.000000
                2018-05-07                      NaN                 0.395075      0.088887                                1.000000
                2018-05-08                      NaN                 0.395075      0.088887                                1.000000
     ...                                        ...                      ...           ...                                     ...
                2019-07-15                 0.000000                 0.395075      0.000000                                0.000000
                2019-07-16                 0.000000                 0.395075      0.000000                                0.000000
                2019-07-17                 0.000000                 0.395075      0.000000                                0.000000
                2019-07-18                 0.175322                 0.395075      0.175322                                1.972414
                2019-07-19                 0.175322                 0.395075      0.175322                                1.972414
     
     [299 rows x 4 columns]
     """
     self.check_same(data, except_data)

Ejemplo n.º 12

0

Mostrar archivo

 def test_case(instruments, queries, note=None):
     if note:
         print(note)
     print(f"checking {instruments} with queries {queries}")
     df = D.features(instruments, queries)
     print(df)
     return df

Ejemplo n.º 13

0

Mostrar archivo

Archivo: test_pit.py Proyecto: yutiansut/qlib

 def test_expr2(self):
     instruments = ["sh600519"]
     fields = ["P($$roewa_q)", "P($$yoyni_q)"]
     fields += ["P(($$roewa_q / $$yoyni_q) / Ref($$roewa_q / $$yoyni_q, 1) - 1)"]
     fields += ["P(Sum($$yoyni_q, 4))"]
     fields += ["$close", "P($$roewa_q) * $close"]
     data = D.features(instruments, fields, start_time="2019-01-01", end_time="2020-01-01", freq="day")
     except_data = """
                                    P($$roewa_q)  P($$yoyni_q)  P(($$roewa_q / $$yoyni_q) / Ref($$roewa_q / $$yoyni_q, 1) - 1)  P(Sum($$yoyni_q, 4))      $close  P($$roewa_q) * $close
     instrument datetime                                                                                                                                                       
     sh600519   2019-01-02      0.255220      0.243892                                           1.484224                           1.661578   63.595333              16.230801
                2019-01-03      0.255220      0.243892                                           1.484224                           1.661578   62.641907              15.987467
                2019-01-04      0.255220      0.243892                                           1.484224                           1.661578   63.915985              16.312637
                2019-01-07      0.255220      0.243892                                           1.484224                           1.661578   64.286530              16.407207
                2019-01-08      0.255220      0.243892                                           1.484224                           1.661578   64.212196              16.388237
     ...                             ...           ...                                                ...                                ...         ...                    ...
                2019-12-25      0.255819      0.219821                                           0.677052                           1.081693  122.150467              31.248409
                2019-12-26      0.255819      0.219821                                           0.677052                           1.081693  122.301315              31.286999
                2019-12-27      0.255819      0.219821                                           0.677052                           1.081693  125.307404              32.056015
                2019-12-30      0.255819      0.219821                                           0.677052                           1.081693  127.763992              32.684456
                2019-12-31      0.255819      0.219821                                           0.677052                           1.081693  127.462303              32.607277
     
     [244 rows x 6 columns]
     """
     self.check_same(data, except_data)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_pit.py Proyecto: yutiansut/qlib

 def test_expr(self):
     fields = [
         "P(Mean($$roewa_q, 1))",
         "P($$roewa_q)",
         "P(Mean($$roewa_q, 2))",
         "P(Ref($$roewa_q, 1))",
         "P((Ref($$roewa_q, 1) +$$roewa_q) / 2)",
     ]
     instruments = ["sh600519"]
     data = D.features(instruments, fields, start_time="2019-01-01", end_time="2019-07-19", freq="day")
     expect = """
                            P(Mean($$roewa_q, 1))  P($$roewa_q)  P(Mean($$roewa_q, 2))  P(Ref($$roewa_q, 1))  P((Ref($$roewa_q, 1) +$$roewa_q) / 2)
     instrument datetime
     sh600519   2019-07-01               0.094737      0.094737               0.219691              0.344644                               0.219691
                2019-07-02               0.094737      0.094737               0.219691              0.344644                               0.219691
                2019-07-03               0.094737      0.094737               0.219691              0.344644                               0.219691
                2019-07-04               0.094737      0.094737               0.219691              0.344644                               0.219691
                2019-07-05               0.094737      0.094737               0.219691              0.344644                               0.219691
                2019-07-08               0.094737      0.094737               0.219691              0.344644                               0.219691
                2019-07-09               0.094737      0.094737               0.219691              0.344644                               0.219691
                2019-07-10               0.094737      0.094737               0.219691              0.344644                               0.219691
                2019-07-11               0.094737      0.094737               0.219691              0.344644                               0.219691
                2019-07-12               0.094737      0.094737               0.219691              0.344644                               0.219691
                2019-07-15               0.000000      0.000000               0.047369              0.094737                               0.047369
                2019-07-16               0.000000      0.000000               0.047369              0.094737                               0.047369
                2019-07-17               0.000000      0.000000               0.047369              0.094737                               0.047369
                2019-07-18               0.175322      0.175322               0.135029              0.094737                               0.135029
                2019-07-19               0.175322      0.175322               0.135029              0.094737                               0.135029
     """
     self.check_same(data.tail(15), expect)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_dataset.py Proyecto: wangyihan-aicai/qlib

 def testClose(self):
     close_p = D.features(D.instruments("csi300"),
                          ["Ref($close, 1)/$close - 1"])
     close_desc = close_p.describe(percentiles=np.arange(0.1, 1.0, 0.1))
     print(close_desc)
     self.assertLessEqual(abs(close_desc.loc["90%"][0]), 0.1,
                          "Close value is abnormal")
     self.assertLessEqual(abs(close_desc.loc["10%"][0]), 0.1,
                          "Close value is abnormal")

Ejemplo n.º 16

0

Mostrar archivo

Archivo: test_dump_data.py Proyecto: yelianjin/qlib

    def test_3_dump_features_simple(self):
        stock = self.STOCK_NAMES[0]
        dump_data = DumpData(csv_path=SOURCE_DIR.joinpath(f"{stock.lower()}.csv"), qlib_dir=QLIB_DIR)
        dump_data.dump_features(include_fields=self.FIELDS, calendar_path=QLIB_DIR.joinpath("calendars", "day.txt"))

        df = D.features([stock], self.QLIB_FIELDS)

        self.assertEqual(len(df), len(TestDumpData.SIMPLE_DATA), "dump features simple failed")
        self.assertTrue(np.isclose(df.dropna(), self.SIMPLE_DATA.dropna()).all(), "dump features simple failed")

Ejemplo n.º 17

0

Mostrar archivo

Archivo: collector.py Proyecto: ycl010203/qlib

    def _get_old_data(self, qlib_data_dir: [str, Path]):
        import qlib
        from qlib.data import D

        qlib_data_dir = str(Path(qlib_data_dir).expanduser().resolve())
        qlib.init(provider_uri=qlib_data_dir, expression_cache=None, dataset_cache=None)
        df = D.features(D.instruments("all"), ["$close/$factor", "$adjclose/$close"])
        df.columns = [self._ori_close_field, self._first_close_field]
        return df

Ejemplo n.º 18

0

Mostrar archivo

Archivo: example.py Proyecto: yutiansut/qlib

 def test_basic01(self):
     df = D.features(
         self.stocks_list,
         fields=["TResample($ask1, '1min', 'last')"],
         freq="ticks",
         start_time="20201230",
         end_time="20210101",
     )
     print(df)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: example.py Proyecto: yutiansut/qlib

 def test_basic03(self):
     df = D.features(
         self.stocks_list,
         fields=["$function_code"],
         freq="order",
         start_time="20201230",
         end_time="20210101",
     )
     print(df)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: test_register_ops.py Proyecto: zhongbinEDEN/qlib

 def test_regiter_custom_ops(self):
     instruments = ["SH600000"]
     fields = ["Diff($close)", "Distance($close, Ref($close, 1))"]
     print(
         D.features(instruments,
                    fields,
                    start_time="2010-01-01",
                    end_time="2017-12-31",
                    freq="day"))

Ejemplo n.º 21

0

Mostrar archivo

Archivo: example.py Proyecto: yutiansut/qlib

 def test_exp_09_trans(self):
     exprs = [
         f'TResample(Div(Sub(TResample({self.expr7_3_init("C", "Gt", "3")}, "3s", "last"), Ref(TResample({self.expr7_3_init("C", "Gt", "3")}, "3s","last"), 1)), 3), "1min", "mean")',
         f'TResample(Div(Sub(TResample({self.expr7_3_init("C", "Lt", "3")}, "3s", "last"), Ref(TResample({self.expr7_3_init("C", "Lt", "3")}, "3s","last"), 1)), 3), "1min", "mean")',
     ]
     names = ["ca_diff_intensity_3s_3s", "cb_diff_intensity_3s_3s"]
     df = D.features(self.stocks_list, fields=exprs, freq="transaction")
     df.columns = names
     print(df)

Ejemplo n.º 22

0

Mostrar archivo

Archivo: example.py Proyecto: yutiansut/qlib

 def test_basic(self):
     # NOTE: this data contains a lot of zeros in $askX and $bidX
     df = D.features(
         self.stocks_list,
         fields=["$ask1", "$ask2", "$bid1", "$bid2"],
         freq="ticks",
         start_time="20201230",
         end_time="20210101",
     )
     print(df)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: example.py Proyecto: yutiansut/qlib

    def test_exp_05(self):
        exprs = [
            f"2 * Sub({ self.total_func('ask', 'last')}, {self.total_func('bid', 'last')})/{self.expr_sum_buy_ask_1}",
            f"Sub({ self.total_func('asize', 'mean')}, {self.total_func('bsize', 'mean')})/{self.total_volume}",
        ]
        names = ["p_accspread", "v_accspread"]

        df = D.features(self.stocks_list, fields=exprs, freq="ticks")
        df.columns = names
        print(df)

Ejemplo n.º 24

0

Mostrar archivo

Archivo: example.py Proyecto: yutiansut/qlib

    def test_exp_04(self):
        exprs = []
        names = []
        for name in ["asize", "bsize"]:
            exprs.append(f"(({ self.total_func(name, 'mean')}) / 10) / {self.total_volume}")
            names.append(f"v_avg_{name}")

        df = D.features(self.stocks_list, fields=exprs, freq="ticks")
        df.columns = names
        print(df)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: example.py Proyecto: yutiansut/qlib

 def test_exp_01(self):
     exprs = []
     names = []
     for name in ["asize", "bsize"]:
         for i in range(1, 11):
             exprs.append(f"TResample(${name}{i}, '1min', 'mean') / ({self.total_volume})")
             names.append(f"v_{name}_{i}")
     df = D.features(self.stocks_list, fields=exprs, freq="ticks")
     df.columns = names
     print(df)

Ejemplo n.º 26

0

Mostrar archivo

Archivo: collector.py Proyecto: ycl010203/qlib

    def _get_all_1d_data(self):
        import qlib
        from qlib.data import D

        qlib.init(provider_uri=self.qlib_data_1d_dir)
        df = D.features(D.instruments("all"), ["$paused", "$volume", "$factor", "$close"], freq="day")
        df.reset_index(inplace=True)
        df.rename(columns={"datetime": self._date_field_name, "instrument": self._symbol_field_name}, inplace=True)
        df.columns = list(map(lambda x: x[1:] if x.startswith("$") else x, df.columns))
        return df

Ejemplo n.º 27

0

Mostrar archivo

def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):

    universe = D.features(D.instruments("csi300"), ["$close"],
                          start_time=start_time).swaplevel().sort_index()

    price_all = (D.features(
        D.instruments("all"), ["$close"],
        start_time=start_time).squeeze().unstack(level="instrument"))

    # StructuredCovEstimator is a statistical risk model
    riskmodel = StructuredCovEstimator()

    for i in range(T - 1, len(price_all)):

        date = price_all.index[i]
        ref_date = price_all.index[i - T + 1]

        print(date)

        codes = universe.loc[date].index
        price = price_all.loc[ref_date:date, codes]

        # calculate return and remove extreme return
        ret = price.pct_change()
        ret.clip(ret.quantile(0.025),
                 ret.quantile(0.975),
                 axis=1,
                 inplace=True)

        # run risk model
        F, cov_b, var_u = riskmodel.predict(ret,
                                            is_price=False,
                                            return_decomposed_components=True)

        # save risk data
        root = riskdata_root + "/" + date.strftime("%Y%m%d")
        os.makedirs(root, exist_ok=True)

        pd.DataFrame(F, index=codes).to_pickle(root + "/factor_exp.pkl")
        pd.DataFrame(cov_b).to_pickle(root + "/factor_cov.pkl")
        # for specific_risk we follow the convention to save volatility
        pd.Series(np.sqrt(var_u),
                  index=codes).to_pickle(root + "/specific_risk.pkl")

Ejemplo n.º 28

0

Mostrar archivo

def fill_1min_using_1d(
    data_1min_dir: [str, Path],
    qlib_data_1d_dir: [str, Path],
    max_workers: int = 16,
    date_field_name: str = "date",
    symbol_field_name: str = "symbol",
):
    """Use 1d data to fill in the missing symbols relative to 1min

    Parameters
    ----------
    data_1min_dir: str
        1min data dir
    qlib_data_1d_dir: str
        1d qlib data(bin data) dir, from: https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format
    max_workers: int
        ThreadPoolExecutor(max_workers), by default 16
    date_field_name: str
        date field name, by default date
    symbol_field_name: str
        symbol field name, by default symbol

    """
    data_1min_dir = Path(data_1min_dir).expanduser().resolve()
    qlib_data_1d_dir = Path(qlib_data_1d_dir).expanduser().resolve()

    min_date, max_date = get_date_range(data_1min_dir, max_workers, date_field_name)
    symbols_1min = get_symbols(data_1min_dir)

    qlib.init(provider_uri=str(qlib_data_1d_dir))
    data_1d = D.features(D.instruments("all"), ["$close"], min_date, max_date, freq="day")

    miss_symbols = set(data_1d.index.get_level_values(level="instrument").unique()) - set(symbols_1min)
    if not miss_symbols:
        logger.warning("More symbols in 1min than 1d, no padding required")
        return

    logger.info(f"miss_symbols  {len(miss_symbols)}: {miss_symbols}")
    tmp_df = pd.read_csv(list(data_1min_dir.glob("*.csv"))[0])
    columns = tmp_df.columns
    _si = tmp_df[symbol_field_name].first_valid_index()
    is_lower = tmp_df.loc[_si][symbol_field_name].islower()
    for symbol in tqdm(miss_symbols):
        if is_lower:
            symbol = symbol.lower()
        index_1d = data_1d.loc(axis=0)[symbol.upper()].index
        index_1min = generate_minutes_calendar_from_daily(index_1d)
        index_1min.name = date_field_name
        _df = pd.DataFrame(columns=columns, index=index_1min)
        if date_field_name in _df.columns:
            del _df[date_field_name]
        _df.reset_index(inplace=True)
        _df[symbol_field_name] = symbol
        _df["paused_num"] = 0
        _df.to_csv(data_1min_dir.joinpath(f"{symbol}.csv"), index=False)

Ejemplo n.º 29

0

Mostrar archivo

Archivo: example.py Proyecto: yutiansut/qlib

 def test_exp_10(self):
     exprs = []
     names = []
     for i in [5, 10, 30, 60]:
         exprs.append(
             f'TResample(Ref(TResample($ask1 + $bid1, "1s", "ffill"), {-i}) / TResample($ask1 + $bid1, "1s", "ffill") - 1, "1min", "mean" )'
         )
         names.append(f"lag_{i}_change_rate" for i in [5, 10, 30, 60])
     df = D.features(self.stocks_list, fields=exprs, freq="ticks")
     df.columns = names
     print(df)

Ejemplo n.º 30

0

Mostrar archivo

    def test_0_qlib_data(self):

        GetData().qlib_data(name="qlib_data_simple",
                            target_dir=QLIB_DIR,
                            region="cn",
                            interval="1d",
                            version="latest")
        df = D.features(D.instruments("csi300"), self.FIELDS)
        self.assertListEqual(list(df.columns), self.FIELDS,
                             "get qlib data failed")
        self.assertFalse(df.dropna().empty, "get qlib data failed")