コード例 #1
0
ファイル: test_sql_engine.py プロジェクト: rlcjj/alpha-mind
    def test_sql_engine_fetch_risk_model(self):
        ref_date = self.ref_date
        universe = Universe('zz500') + Universe('zz1000')
        codes = self.engine.fetch_codes(ref_date, universe)

        risk_cov, risk_exp = self.engine.fetch_risk_model(ref_date,
                                                          codes,
                                                          risk_model='short')
        self.assertListEqual(risk_cov.Factor.tolist(),
                             risk_cov.columns[2:].tolist())

        query = select([RiskCovShort
                        ]).where(RiskCovShort.trade_date == ref_date)

        cov_df = pd.read_sql(query,
                             con=self.engine.engine).sort_values('FactorID')
        factors = cov_df.Factor.tolist()
        np.testing.assert_array_almost_equal(risk_cov[factors].values,
                                             cov_df[factors].values)

        query = select([RiskExposure]).where(
            and_(RiskExposure.trade_date == ref_date,
                 RiskExposure.code.in_(codes)))

        exp_df = pd.read_sql(query, con=self.engine.engine)
        np.testing.assert_array_almost_equal(exp_df[factors].values,
                                             risk_exp[factors].values)
コード例 #2
0
    def fetch_industry_range(self,
                             universe: Universe,
                             start_date: str = None,
                             end_date: str = None,
                             dates: Iterable[str] = None,
                             category: str = 'sw',
                             level: int = 1):
        industry_category_name = _map_industry_category(category)
        cond = universe._query_statements(start_date, end_date, dates)

        big_table = join(
            Industry, UniverseTable,
            and_(Industry.trade_date == UniverseTable.trade_date,
                 Industry.code == UniverseTable.code,
                 Industry.industry == industry_category_name, cond))

        code_name = 'industryID' + str(level)
        category_name = 'industryName' + str(level)

        query = select([
            Industry.trade_date, Industry.code,
            getattr(Industry, code_name).label('industry_code'),
            getattr(Industry, category_name).label('industry')
        ]).select_from(big_table).distinct()

        df = pd.read_sql(query, self.engine).dropna()
        if universe.is_filtered:
            codes = universe.query(self, start_date, end_date, dates)
            df = pd.merge(df, codes, how='inner',
                          on=['trade_date',
                              'code']).sort_values(['trade_date', 'code'])
        return df.drop_duplicates(['trade_date', 'code'])
コード例 #3
0
ファイル: test_sql_engine.py プロジェクト: rlcjj/alpha-mind
    def test_sql_engine_fetch_dx_return_range(self):
        ref_dates = makeSchedule(
            advanceDateByCalendar('china.sse', self.ref_date, '-6m'),
            self.ref_date, '60b', 'china.sse')
        universe = Universe('zz500') + Universe('zz1000')

        dx_return = self.engine.fetch_dx_return_range(universe,
                                                      dates=ref_dates,
                                                      horizon=4,
                                                      offset=1)

        codes = self.engine.fetch_codes_range(universe, dates=ref_dates)
        groups = codes.groupby('trade_date')

        for ref_date, g in groups:
            start_date = advanceDateByCalendar('china.sse', ref_date, '2b')
            end_date = advanceDateByCalendar('china.sse', ref_date, '6b')

            query = select([Market.code, Market.chgPct]).where(
                and_(Market.trade_date.between(start_date, end_date),
                     Market.code.in_(g.code.unique().tolist())))

            df = pd.read_sql(query, con=self.engine.engine)
            res = df.groupby('code').apply(lambda x: np.log(1. + x).sum())
            calculated_return = dx_return[dx_return.trade_date == ref_date]
            np.testing.assert_array_almost_equal(calculated_return.dx.values,
                                                 res.chgPct.values)
コード例 #4
0
ファイル: test_sql_engine.py プロジェクト: rlcjj/alpha-mind
    def test_sql_engine_fetch_factor_range_forward(self):
        ref_dates = makeSchedule(
            advanceDateByCalendar('china.sse', self.ref_date, '-6m'),
            self.ref_date, '60b', 'china.sse')
        ref_dates = ref_dates + [
            advanceDateByCalendar('china.sse', ref_dates[-1],
                                  '60b').strftime('%Y-%m-%d')
        ]
        universe = Universe('zz500') + Universe('zz1000')
        factor = 'ROE'

        factor_data = self.engine.fetch_factor_range_forward(universe,
                                                             factor,
                                                             dates=ref_dates)

        codes = self.engine.fetch_codes_range(universe, dates=ref_dates[:-1])
        groups = codes.groupby('trade_date')

        for ref_date, g in groups:
            forward_ref_date = advanceDateByCalendar(
                'china.sse', ref_date, '60b').strftime('%Y-%m-%d')
            query = select([Uqer.code, Uqer.ROE]).where(
                and_(Uqer.trade_date == forward_ref_date,
                     Uqer.code.in_(g.code.unique().tolist())))

            df = pd.read_sql(query, con=self.engine.engine)
            calculated_factor = factor_data[factor_data.trade_date == ref_date]
            calculated_factor.set_index('code', inplace=True)
            calculated_factor = calculated_factor.loc[df.code]
            np.testing.assert_array_almost_equal(calculated_factor.dx.values,
                                                 df.ROE.values)
コード例 #5
0
    def test_universe_equal(self):
        universe1 = Universe('zz500')
        universe2 = Universe('zz500')
        self.assertEqual(universe1, universe2)

        universe1 = Universe('zz500')
        universe2 = Universe('zz800')
        self.assertNotEqual(universe1, universe2)
コード例 #6
0
    def fetch_risk_model_range(
            self,
            universe: Universe,
            start_date: str = None,
            end_date: str = None,
            dates: Iterable[str] = None,
            risk_model: str = 'short',
            excluded: Iterable[str] = None
    ) -> Tuple[pd.DataFrame, pd.DataFrame]:

        risk_cov_table, special_risk_col = _map_risk_model_table(risk_model)

        cov_risk_cols = [
            risk_cov_table.__table__.columns[f] for f in total_risk_factors
        ]

        cond = risk_cov_table.trade_date.in_(
            dates) if dates else risk_cov_table.trade_date.between(
                start_date, end_date)
        query = select([
            risk_cov_table.trade_date, risk_cov_table.FactorID,
            risk_cov_table.Factor
        ] + cov_risk_cols).where(cond)

        risk_cov = pd.read_sql(query, self.engine).sort_values(
            ['trade_date', 'FactorID'])

        if not excluded:
            excluded = []

        risk_exposure_cols = [
            FullFactor.__table__.columns[f] for f in total_risk_factors
            if f not in set(excluded)
        ]

        cond = universe._query_statements(start_date, end_date, dates)

        big_table = join(
            FullFactor, UniverseTable,
            and_(FullFactor.trade_date == UniverseTable.trade_date,
                 FullFactor.code == UniverseTable.code, cond))

        query = select(
            [FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols).select_from(big_table) \
            .distinct()

        risk_exp = pd.read_sql(query, self.engine)

        if universe.is_filtered:
            codes = universe.query(self, start_date, end_date, dates)
            risk_exp = pd.merge(risk_exp,
                                codes,
                                how='inner',
                                on=['trade_date', 'code'
                                    ]).sort_values(['trade_date', 'code'])

        return risk_cov, risk_exp
コード例 #7
0
ファイル: test_sql_engine.py プロジェクト: rlcjj/alpha-mind
    def test_sdl_engine_fetch_codes_with_exclude_universe(self):
        ref_date = self.ref_date
        universe = Universe('zz500') - Universe('cyb')
        codes = self.engine.fetch_codes(ref_date, universe)

        query = select([UniverseTable.code]).where(
            and_(UniverseTable.trade_date == ref_date,
                 UniverseTable.zz500 == 1, UniverseTable.cyb == 0))

        df = pd.read_sql(query, con=self.engine.engine).sort_values('code')
        self.assertListEqual(codes, list(df.code.values))
コード例 #8
0
ファイル: test_sql_engine.py プロジェクト: rlcjj/alpha-mind
    def test_sql_engine_fetch_factor(self):
        ref_date = self.ref_date
        universe = Universe('zz500') + Universe('zz1000')
        codes = self.engine.fetch_codes(ref_date, universe)
        factor = 'ROE'

        factor_data = self.engine.fetch_factor(ref_date, factor, codes)

        query = select([Uqer.code, Uqer.ROE]).where(
            and_(Uqer.trade_date == ref_date, Uqer.code.in_(codes)))

        df = pd.read_sql(query, con=self.engine.engine).sort_values('code')
        np.testing.assert_array_almost_equal(factor_data.ROE.values,
                                             df.ROE.values)
コード例 #9
0
    def fetch_industry_range(self,
                             universe: Universe,
                             start_date: str = None,
                             end_date: str = None,
                             dates: Iterable[str] = None,
                             category: str = 'sw',
                             level: int = 1):
        code_name = 'industry_code' + str(level)
        category_name = 'industry_name' + str(level)

        cond = universe._query_statements(start_date, end_date, dates)

        query = select([Industry.code.label("code"),
                        Industry.trade_date,
                        getattr(Industry, code_name).label('industry_code'),
                        getattr(Industry, category_name).label('industry')]).where(
            and_(
                *cond,
                Industry.code == UniverseTable.code,
                Industry.trade_date == UniverseTable.trade_date,
                Industry.flag == 1
            )
        ).distinct()

        df = pd.read_sql(query, self.session.bind)
        df["trade_date"] = pd.to_datetime(df["trade_date"])
        return df
コード例 #10
0
ファイル: test_composer.py プロジェクト: zhibzeng/alpha-mind
    def test_composer_persistence(self):
        freq = '5b'
        universe = Universe('zz800')
        batch = 4
        neutralized_risk = ['SIZE']
        risk_model = 'long'
        pre_process = ['standardize', 'winsorize_normal']
        post_process = ['standardize', 'winsorize_normal']
        warm_start = 2
        data_source = 'postgresql://*****:*****@server/dummy'

        data_meta = DataMeta(freq=freq,
                             universe=universe,
                             batch=batch,
                             neutralized_risk=neutralized_risk,
                             risk_model=risk_model,
                             pre_process=pre_process,
                             post_process=post_process,
                             warm_start=warm_start,
                             data_source=data_source)

        features = {'f1': 'closePrice', 'f2': 'openPrice'}
        alpha_model = XGBClassifier(features=features)

        composer = Composer(alpha_model=alpha_model,
                            data_meta=data_meta)

        comp_desc = composer.save()
        loaded_comp = Composer.load(comp_desc)
        self._assert_composer_equal(composer, loaded_comp)
コード例 #11
0
ファイル: test_composer.py プロジェクト: zhibzeng/alpha-mind
    def test_data_meta_persistence(self):

        freq = '5b'
        universe = Universe('zz800')
        batch = 4
        neutralized_risk = ['SIZE']
        risk_model = 'long'
        pre_process = ['standardize', 'winsorize_normal']
        post_process = ['standardize', 'winsorize_normal']
        warm_start = 2
        data_source = 'postgresql://*****:*****@server/dummy'

        data_meta = DataMeta(freq=freq,
                             universe=universe,
                             batch=batch,
                             neutralized_risk=neutralized_risk,
                             risk_model=risk_model,
                             pre_process=pre_process,
                             post_process=post_process,
                             warm_start=warm_start,
                             data_source=data_source)

        data_desc = data_meta.save()

        loaded_data = DataMeta.load(data_desc)
        self.assertEqual(data_meta.freq, loaded_data.freq)
        self.assertEqual(data_meta.universe, loaded_data.universe)
        self.assertEqual(data_meta.batch, loaded_data.batch)
        self.assertEqual(data_meta.neutralized_risk, loaded_data.neutralized_risk)
        self.assertEqual(data_meta.risk_model, loaded_data.risk_model)
        self.assertEqual(data_meta.pre_process, loaded_data.pre_process)
        self.assertEqual(data_meta.post_process, loaded_data.post_process)
        self.assertEqual(data_meta.warm_start, loaded_data.warm_start)
        self.assertEqual(data_meta.data_source, loaded_data.data_source)
コード例 #12
0
ファイル: sqlengine.py プロジェクト: GehirnW/alpha-mind
    def fetch_industry_range(self,
                             universe: Universe,
                             start_date: str = None,
                             end_date: str = None,
                             dates: Iterable[str] = None,
                             category: str = 'sw'):
        industry_category_name = _map_industry_category(category)
        cond = universe.query_range(start_date, end_date, dates)

        if dates:
            big_table = join(
                Industry, UniverseTable,
                and_(Industry.trade_date == UniverseTable.trade_date,
                     Industry.code == UniverseTable.code,
                     Industry.industry == industry_category_name,
                     Industry.trade_date.in_(dates), cond))
        else:
            big_table = join(
                Industry, UniverseTable,
                and_(Industry.trade_date == UniverseTable.trade_date,
                     Industry.code == UniverseTable.code,
                     Industry.industry == industry_category_name,
                     Industry.trade_date.between(start_date, end_date), cond))

        query = select([
            Industry.trade_date, Industry.code,
            Industry.industryID1.label('industry_code'),
            Industry.industryName1.label('industry')
        ]).select_from(big_table).distinct()
        return pd.read_sql(query, self.engine)
コード例 #13
0
ファイル: sqlengine.py プロジェクト: GehirnW/alpha-mind
 def fetch_codes(self, ref_date: str, universe: Universe) -> List[int]:
     cond = universe.query(ref_date)
     query = select([UniverseTable.trade_date,
                     UniverseTable.code]).distinct().where(cond)
     cursor = self.engine.execute(query)
     codes_set = {c[1] for c in cursor.fetchall()}
     return sorted(codes_set)
コード例 #14
0
ファイル: sqlengine.py プロジェクト: zhibzeng/alpha-mind
    def fetch_industry_range(self,
                             universe: Universe,
                             start_date: str = None,
                             end_date: str = None,
                             dates: Iterable[str] = None,
                             category: str = 'sw',
                             level: int = 1):
        industry_category_name = _map_industry_category(category)
        cond = universe._query_statements(start_date, end_date, dates)

        big_table = join(Industry, UniverseTable,
                         and_(
                             Industry.trade_date == UniverseTable.trade_date,
                             Industry.code == UniverseTable.code,
                             Industry.industry == industry_category_name,
                             cond
                         ))

        code_name = 'industryID' + str(level)
        category_name = 'industryName' + str(level)

        query = select([Industry.trade_date,
                        Industry.code,
                        getattr(Industry, code_name).label('industry_code'),
                        getattr(Industry, category_name).label('industry')]).select_from(big_table)\
            .order_by(Industry.trade_date, Industry.code)

        return pd.read_sql(query, self.engine).dropna()
コード例 #15
0
ファイル: sqlengine.py プロジェクト: zhibzeng/alpha-mind
    def fetch_factor_range(self,
                           universe: Universe,
                           factors: Union[Transformer, Iterable[object]],
                           start_date: str = None,
                           end_date: str = None,
                           dates: Iterable[str] = None,
                           external_data: pd.DataFrame = None,
                           used_factor_tables=None) -> pd.DataFrame:

        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency

        if used_factor_tables:
            factor_cols = _map_factors(dependency, used_factor_tables)
        else:
            factor_cols = _map_factors(dependency, factor_tables)

        big_table = Market
        joined_tables = set()
        joined_tables.add(Market.__table__.name)

        for t in set(factor_cols.values()):
            if t.__table__.name not in joined_tables:
                if dates is not None:
                    big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date,
                                                             Market.code == t.code,
                                                             Market.trade_date.in_(dates)))
                else:
                    big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date,
                                                             Market.code == t.code,
                                                             Market.trade_date.between(start_date, end_date)))
                joined_tables.add(t.__table__.name)

        universe_df = universe.query(self, start_date, end_date, dates)

        query = select(
            [Market.trade_date, Market.code, Market.chgPct] + list(factor_cols.keys())) \
            .select_from(big_table).where(
                and_(
                    Market.code.in_(universe_df.code.unique().tolist()),
                    Market.trade_date.in_(dates) if dates is not None else Market.trade_date.between(start_date, end_date)
                )
        ).distinct()

        df = pd.read_sql(query, self.engine).replace([-np.inf, np.inf], np.nan)

        if external_data is not None:
            df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna()

        df.sort_values(['trade_date', 'code'], inplace=True)
        df.set_index('trade_date', inplace=True)
        res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan)

        res['chgPct'] = df.chgPct
        res = res.reset_index()
        return pd.merge(res, universe_df[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
コード例 #16
0
ファイル: sqlengine.py プロジェクト: zhibzeng/alpha-mind
    def fetch_trade_status_range(self,
                                 universe: Universe,
                                 start_date: str = None,
                                 end_date: str = None,
                                 dates: Iterable[str] = None,
                                 offset=0):
        codes = universe.query(self, start_date, end_date, dates)

        if dates:
            start_date = dates[0]
            end_date = dates[-1]

        end_date = advanceDateByCalendar('china.sse', end_date,
                                         str(offset) + 'b').strftime('%Y-%m-%d')

        stats = func.lead(Market.isOpen, offset).over(
            partition_by=Market.code,
            order_by=Market.trade_date).label('is_open')
        cte = select([Market.trade_date, Market.code, stats]).where(
            and_(
                Market.trade_date.between(start_date, end_date),
                Market.code.in_(codes.code.unique().tolist())
            )
        ).cte('cte')

        query = select([cte]).select_from(cte).order_by(cte.columns['trade_date'], cte.columns['code'])
        df = pd.read_sql(query, self.engine)

        return pd.merge(df, codes[['trade_date', 'code']], on=['trade_date', 'code'])
コード例 #17
0
ファイル: test_sql_engine.py プロジェクト: rlcjj/alpha-mind
    def test_sql_engine_fetch_industry_matrix(self):
        ref_date = self.ref_date
        universe = Universe('zz500') + Universe('zz1000')
        codes = self.engine.fetch_codes(ref_date, universe)

        ind_matrix = self.engine.fetch_industry_matrix(ref_date, codes, 'sw',
                                                       1)

        query = select([Industry.code, Industry.industryName1]).where(
            and_(Industry.trade_date == ref_date,
                 Industry.industry == '申万行业分类', Industry.code.in_(codes)))

        df = pd.read_sql(query, con=self.engine.engine)
        df = pd.get_dummies(df, prefix="", prefix_sep="")

        self.assertEqual(len(ind_matrix), len(df))
        np.testing.assert_array_almost_equal(df[ind_matrix.columns[2:]].values,
                                             ind_matrix.iloc[:, 2:].values)
コード例 #18
0
ファイル: sqlengine.py プロジェクト: GehirnW/alpha-mind
 def fetch_codes_range(self,
                       universe: Universe,
                       start_date: str = None,
                       end_date: str = None,
                       dates: Iterable[str] = None) -> pd.DataFrame:
     cond = universe.query_range(start_date, end_date, dates)
     query = select([UniverseTable.trade_date,
                     UniverseTable.code]).distinct().where(cond)
     return pd.read_sql(query, self.engine)
コード例 #19
0
ファイル: test_sql_engine.py プロジェクト: rlcjj/alpha-mind
    def test_sql_engine_fetch_dx_return(self):
        horizon = 4
        offset = 1
        ref_date = self.ref_date
        universe = Universe('zz500') + Universe('zz1000')
        codes = self.engine.fetch_codes(ref_date, universe)

        dx_return = self.engine.fetch_dx_return(ref_date,
                                                codes,
                                                horizon=horizon,
                                                offset=offset)
        start_date = advanceDateByCalendar('china.sse', ref_date, '2b')
        end_date = advanceDateByCalendar('china.sse', ref_date, '6b')

        query = select([Market.code, Market.chgPct]).where(
            and_(Market.trade_date.between(start_date, end_date),
                 Market.code.in_(dx_return.code.unique().tolist())))

        df = pd.read_sql(query, con=self.engine.engine)
        res = df.groupby('code').apply(lambda x: np.log(1. + x).sum())
        np.testing.assert_array_almost_equal(dx_return.dx.values,
                                             res.chgPct.values)

        horizon = 4
        offset = 0
        ref_date = self.ref_date
        universe = Universe('zz500') + Universe('zz1000')
        codes = self.engine.fetch_codes(ref_date, universe)

        dx_return = self.engine.fetch_dx_return(ref_date,
                                                codes,
                                                horizon=horizon,
                                                offset=offset)
        start_date = advanceDateByCalendar('china.sse', ref_date, '1b')
        end_date = advanceDateByCalendar('china.sse', ref_date, '5b')

        query = select([Market.code, Market.chgPct]).where(
            and_(Market.trade_date.between(start_date, end_date),
                 Market.code.in_(dx_return.code.unique().tolist())))

        df = pd.read_sql(query, con=self.engine.engine)
        res = df.groupby('code').apply(lambda x: np.log(1. + x).sum())
        np.testing.assert_array_almost_equal(dx_return.dx.values,
                                             res.chgPct.values)
コード例 #20
0
ファイル: test_sql_engine.py プロジェクト: rlcjj/alpha-mind
    def test_sql_engine_fetch_codes_range(self):
        ref_dates = makeSchedule(
            advanceDateByCalendar('china.sse', self.ref_date, '-6m'),
            self.ref_date, '60b', 'china.sse')
        universe = Universe('zz500') + Universe('zz1000')
        codes = self.engine.fetch_codes_range(universe, dates=ref_dates)

        query = select([UniverseTable.trade_date, UniverseTable.code]).where(
            and_(UniverseTable.trade_date.in_(ref_dates),
                 or_(UniverseTable.zz500 == 1, UniverseTable.zz1000 == 1)))

        df = pd.read_sql(query, con=self.engine.engine).sort_values('code')

        for ref_date in ref_dates:
            calculated_codes = list(
                sorted(codes[codes.trade_date == ref_date].code.values))
            expected_codes = list(
                sorted(df[df.trade_date == ref_date].code.values))
            self.assertListEqual(calculated_codes, expected_codes)
コード例 #21
0
ファイル: sqlengine.py プロジェクト: zhibzeng/alpha-mind
    def fetch_risk_model_range(self,
                               universe: Universe,
                               start_date: str = None,
                               end_date: str = None,
                               dates: Iterable[str] = None,
                               risk_model: str = 'short',
                               excluded: Iterable[str] = None) -> Tuple[pd.DataFrame, pd.DataFrame]:

        risk_cov_table, special_risk_table = _map_risk_model_table(risk_model)

        cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors]

        cond = risk_cov_table.trade_date.in_(dates) if dates else risk_cov_table.trade_date.between(start_date,
                                                                                                    end_date)
        query = select([risk_cov_table.trade_date,
                        risk_cov_table.FactorID,
                        risk_cov_table.Factor]
                       + cov_risk_cols).where(
            cond
        )

        risk_cov = pd.read_sql(query, self.engine).sort_values(['trade_date', 'FactorID'])

        if not excluded:
            excluded = []

        risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded)]

        cond = universe._query_statements(start_date, end_date, dates)

        big_table = join(RiskExposure, UniverseTable,
                         and_(
                             RiskExposure.trade_date == UniverseTable.trade_date,
                             RiskExposure.code == UniverseTable.code,
                             cond
                         )
                         )

        big_table = join(special_risk_table,
                         big_table,
                         and_(
                             RiskExposure.code == special_risk_table.code,
                             RiskExposure.trade_date == special_risk_table.trade_date,
                         ))

        query = select(
            [RiskExposure.trade_date,
             RiskExposure.code,
             special_risk_table.SRISK.label('srisk')] + risk_exposure_cols).select_from(big_table) \
            .distinct()

        risk_exp = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code']).dropna()

        return risk_cov, risk_exp
コード例 #22
0
    def test_sql_engine_fetch_codes(self):
        ref_date = self.ref_date
        universe = Universe('custom', ['zz500', 'zz1000'])
        codes = self.engine.fetch_codes(ref_date, universe)

        query = select([UniverseTable.code]).where(
            and_(UniverseTable.trade_date == ref_date,
                 UniverseTable.universe.in_(['zz500', 'zz1000']))).distinct()

        df = pd.read_sql(query, con=self.engine.engine).sort_values('code')
        self.assertListEqual(codes, list(df.code.values))
コード例 #23
0
ファイル: sqlengine.py プロジェクト: dotkt/alpha-mind
    def fetch_factor_range_forward(self,
                                   universe: Universe,
                                   factors: Union[Transformer, object],
                                   start_date: str = None,
                                   end_date: str = None,
                                   dates: Iterable[str] = None):
        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency
        factor_cols = _map_factors(dependency, factor_tables)

        codes = universe.query(self, start_date, end_date, dates)
        total_codes = codes.code.unique().tolist()
        total_dates = codes.trade_date.astype(str).unique().tolist()

        big_table = Market
        joined_tables = set()
        joined_tables.add(Market.__table__.name)

        for t in set(factor_cols.values()):
            if t.__table__.name not in joined_tables:
                if dates is not None:
                    big_table = outerjoin(
                        big_table, t,
                        and_(Market.trade_date == t.trade_date,
                             Market.code == t.code,
                             Market.trade_date.in_(dates)))
                else:
                    big_table = outerjoin(
                        big_table, t,
                        and_(Market.trade_date == t.trade_date,
                             Market.code == t.code,
                             Market.trade_date.between(start_date, end_date)))
                joined_tables.add(t.__table__.name)

        stats = func.lag(list(factor_cols.keys())[0],
                         -1).over(partition_by=Market.code,
                                  order_by=Market.trade_date).label('dx')

        query = select([Market.trade_date, Market.code, Market.chgPct,
                        stats]).select_from(big_table).where(
                            and_(Market.trade_date.in_(total_dates),
                                 Market.code.in_(total_codes)))

        df = pd.read_sql(query, self.engine) \
            .replace([-np.inf, np.inf], np.nan) \
            .sort_values(['trade_date', 'code'])
        return pd.merge(df, codes[['trade_date', 'code']],
                        how='inner').drop_duplicates(['trade_date', 'code'])
コード例 #24
0
    def test_sql_engine_fetch_factor_by_categories(self):
        ref_date = self.ref_date
        universe = Universe('zz500') + Universe('zz1000')
        codes = self.engine.fetch_codes(ref_date, universe)

        factor1 = {'f': CSRank('ROE', groups='sw1')}
        factor2 = {'f': CSQuantiles('ROE', groups='sw1')}
        raw_factor = 'ROE'

        df1 = self.engine.fetch_factor(ref_date, factor1, codes)
        df2 = self.engine.fetch_factor(ref_date, factor2, codes)
        df3 = self.engine.fetch_factor(ref_date, raw_factor, codes)

        ind_matrix = self.engine.fetch_industry_matrix(ref_date, codes, 'sw',
                                                       1)

        cols = sorted(ind_matrix.columns[2:].tolist())

        series = (ind_matrix[cols] *
                  np.array(range(1,
                                 len(cols) + 1))).sum(axis=1)
        df3['cat'] = series.values

        expected_rank = df3[[
            'ROE', 'cat'
        ]].groupby('cat').transform(lambda x: rankdata(x.values) - 1.)
        expected_rank[np.isnan(df3.ROE)] = np.nan
        df3['rank'] = expected_rank['ROE'].values
        np.testing.assert_array_almost_equal(df3['rank'].values,
                                             df1['f'].values)

        expected_quantile = df3[[
            'ROE', 'cat'
        ]].groupby('cat').transform(lambda x: (rankdata(x.values) - 1.) /
                                    ((len(x) - 1.) if len(x) > 1 else 1.))
        expected_quantile[np.isnan(df3.ROE)] = np.nan
        df3['quantile'] = expected_quantile['ROE'].values
        np.testing.assert_array_almost_equal(df3['quantile'].values,
                                             df2['f'].values)
コード例 #25
0
ファイル: test_sql_engine.py プロジェクト: rlcjj/alpha-mind
    def test_sql_engine_fetch_factor_range(self):
        ref_dates = makeSchedule(
            advanceDateByCalendar('china.sse', self.ref_date, '-6m'),
            self.ref_date, '60b', 'china.sse')
        universe = Universe('zz500') + Universe('zz1000')
        factor = 'ROE'

        factor_data = self.engine.fetch_factor_range(universe,
                                                     factor,
                                                     dates=ref_dates)

        codes = self.engine.fetch_codes_range(universe, dates=ref_dates)
        groups = codes.groupby('trade_date')

        for ref_date, g in groups:
            query = select([Uqer.code, Uqer.ROE]).where(
                and_(Uqer.trade_date == ref_date,
                     Uqer.code.in_(g.code.unique().tolist())))

            df = pd.read_sql(query, con=self.engine.engine)
            calculated_factor = factor_data[factor_data.trade_date == ref_date]
            np.testing.assert_array_almost_equal(calculated_factor.ROE.values,
                                                 df.ROE.values)
コード例 #26
0
    def test_universe_equal(self):
        universe1 = Universe('custom', ['zz500'])
        universe2 = Universe('custom', ['zz500'])
        self.assertEqual(universe1, universe2)

        universe1 = Universe('custom', ['zz500'])
        universe2 = Universe('custom', ['zz800'])
        self.assertNotEqual(universe1, universe2)

        filter_cond = LAST('x') > 1.
        universe1 = Universe('custom', ['zz500'], filter_cond=filter_cond)
        universe2 = Universe('custom', ['zz500'], filter_cond=filter_cond)
        self.assertEqual(universe1, universe2)

        universe1 = Universe('custom', ['zz500'], filter_cond=LAST('x') > 1.)
        universe2 = Universe('custom', ['zz500'], filter_cond=LAST('x') > 2.)
        self.assertNotEqual(universe1, universe2)
コード例 #27
0
    def test_universe_persistence(self):
        universe = Universe('custom', ['zz500'])
        univ_desc = universe.save()
        loaded_universe = Universe.load(univ_desc)

        self.assertEqual(universe.name, loaded_universe.name)
        self.assertListEqual(universe.base_universe,
                             loaded_universe.base_universe)

        universe = Universe('custom', ['zz500'], filter_cond=LAST('x') > 1.)
        univ_desc = universe.save()
        loaded_universe = Universe.load(univ_desc)

        self.assertEqual(universe.name, loaded_universe.name)
        self.assertListEqual(universe.base_universe,
                             loaded_universe.base_universe)
        self.assertEqual(str(universe.filter_cond),
                         str(loaded_universe.filter_cond))
コード例 #28
0
    def load(cls, data_desc: dict):
        freq = data_desc['freq']
        universe = Universe.load(data_desc['universe'])
        batch = data_desc['batch']
        neutralized_risk = data_desc['neutralized_risk']
        risk_model = data_desc['risk_model']
        pre_process = data_desc['pre_process']
        post_process = data_desc['post_process']
        warm_start = data_desc['warm_start']
        data_source = data_desc['data_source']

        return cls(freq=freq,
                   universe=universe,
                   batch=batch,
                   neutralized_risk=neutralized_risk,
                   risk_model=risk_model,
                   pre_process=pre_process,
                   post_process=post_process,
                   warm_start=warm_start,
                   data_source=data_source)
コード例 #29
0
ファイル: sqlengine.py プロジェクト: zhibzeng/alpha-mind
 def fetch_codes_range(self,
                       universe: Universe,
                       start_date: str = None,
                       end_date: str = None,
                       dates: Iterable[str] = None) -> pd.DataFrame:
     return universe.query(self, start_date, end_date, dates)
コード例 #30
0
ファイル: sqlengine.py プロジェクト: GehirnW/alpha-mind
    def fetch_factor_range(self,
                           universe: Universe,
                           factors: Union[Transformer, Iterable[object]],
                           start_date: str = None,
                           end_date: str = None,
                           dates: Iterable[str] = None,
                           external_data: pd.DataFrame = None,
                           used_factor_tables=None) -> pd.DataFrame:

        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency

        if used_factor_tables:
            factor_cols = _map_factors(dependency, used_factor_tables)
        else:
            factor_cols = _map_factors(dependency, factor_tables)

        cond = universe.query_range(start_date, end_date, dates)

        big_table = FullFactor

        for t in set(factor_cols.values()):
            if t.__table__.name != FullFactor.__table__.name:
                if dates is not None:
                    big_table = outerjoin(
                        big_table, t,
                        and_(FullFactor.trade_date == t.trade_date,
                             FullFactor.code == t.code,
                             FullFactor.trade_date.in_(dates)))
                else:
                    big_table = outerjoin(
                        big_table, t,
                        and_(
                            FullFactor.trade_date == t.trade_date,
                            FullFactor.code == t.code,
                            FullFactor.trade_date.between(
                                start_date, end_date)))

        big_table = join(
            big_table, UniverseTable,
            and_(FullFactor.trade_date == UniverseTable.trade_date,
                 FullFactor.code == UniverseTable.code, cond))

        query = select(
            [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
            .select_from(big_table).distinct()

        df = pd.read_sql(query,
                         self.engine).sort_values(['trade_date', 'code'])

        if external_data is not None:
            df = pd.merge(df, external_data, on=['trade_date',
                                                 'code']).dropna()

        df.set_index('trade_date', inplace=True)
        res = transformer.transform('code', df)

        for col in res.columns:
            if col not in set(['code', 'isOpen']) and col not in df.columns:
                df[col] = res[col].values

        df['isOpen'] = df.isOpen.astype(bool)
        return df.reset_index()