예제 #1
0
    def fetch_trade_status_range(self,
                                 universe: Universe,
                                 start_date: str = None,
                                 end_date: str = None,
                                 dates: Iterable[str] = None,
                                 offset=0):
        codes = universe.query(self, start_date, end_date, dates)

        if dates:
            start_date = dates[0]
            end_date = dates[-1]

        end_date = advanceDateByCalendar('china.sse', end_date,
                                         str(offset) + 'b').strftime('%Y-%m-%d')

        stats = func.lead(Market.isOpen, offset).over(
            partition_by=Market.code,
            order_by=Market.trade_date).label('is_open')
        cte = select([Market.trade_date, Market.code, stats]).where(
            and_(
                Market.trade_date.between(start_date, end_date),
                Market.code.in_(codes.code.unique().tolist())
            )
        ).cte('cte')

        query = select([cte]).select_from(cte).order_by(cte.columns['trade_date'], cte.columns['code'])
        df = pd.read_sql(query, self.engine)

        return pd.merge(df, codes[['trade_date', 'code']], on=['trade_date', 'code'])
예제 #2
0
 def fetch_codes(self, ref_date: str, universe: Universe) -> List[int]:
     cond = universe.query(ref_date)
     query = select([UniverseTable.trade_date,
                     UniverseTable.code]).distinct().where(cond)
     cursor = self.engine.execute(query)
     codes_set = {c[1] for c in cursor.fetchall()}
     return sorted(codes_set)
예제 #3
0
    def fetch_industry_range(self,
                             universe: Universe,
                             start_date: str = None,
                             end_date: str = None,
                             dates: Iterable[str] = None,
                             category: str = 'sw',
                             level: int = 1):
        industry_category_name = _map_industry_category(category)
        cond = universe._query_statements(start_date, end_date, dates)

        big_table = join(
            Industry, UniverseTable,
            and_(Industry.trade_date == UniverseTable.trade_date,
                 Industry.code == UniverseTable.code,
                 Industry.industry == industry_category_name, cond))

        code_name = 'industryID' + str(level)
        category_name = 'industryName' + str(level)

        query = select([
            Industry.trade_date, Industry.code,
            getattr(Industry, code_name).label('industry_code'),
            getattr(Industry, category_name).label('industry')
        ]).select_from(big_table).distinct()

        df = pd.read_sql(query, self.engine).dropna()
        if universe.is_filtered:
            codes = universe.query(self, start_date, end_date, dates)
            df = pd.merge(df, codes, how='inner',
                          on=['trade_date',
                              'code']).sort_values(['trade_date', 'code'])
        return df.drop_duplicates(['trade_date', 'code'])
예제 #4
0
    def fetch_factor_range(self,
                           universe: Universe,
                           factors: Union[Transformer, Iterable[object]],
                           start_date: str = None,
                           end_date: str = None,
                           dates: Iterable[str] = None,
                           external_data: pd.DataFrame = None,
                           used_factor_tables=None) -> pd.DataFrame:

        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency

        if used_factor_tables:
            factor_cols = _map_factors(dependency, used_factor_tables)
        else:
            factor_cols = _map_factors(dependency, factor_tables)

        big_table = Market
        joined_tables = set()
        joined_tables.add(Market.__table__.name)

        for t in set(factor_cols.values()):
            if t.__table__.name not in joined_tables:
                if dates is not None:
                    big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date,
                                                             Market.code == t.code,
                                                             Market.trade_date.in_(dates)))
                else:
                    big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date,
                                                             Market.code == t.code,
                                                             Market.trade_date.between(start_date, end_date)))
                joined_tables.add(t.__table__.name)

        universe_df = universe.query(self, start_date, end_date, dates)

        query = select(
            [Market.trade_date, Market.code, Market.chgPct] + list(factor_cols.keys())) \
            .select_from(big_table).where(
                and_(
                    Market.code.in_(universe_df.code.unique().tolist()),
                    Market.trade_date.in_(dates) if dates is not None else Market.trade_date.between(start_date, end_date)
                )
        ).distinct()

        df = pd.read_sql(query, self.engine).replace([-np.inf, np.inf], np.nan)

        if external_data is not None:
            df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna()

        df.sort_values(['trade_date', 'code'], inplace=True)
        df.set_index('trade_date', inplace=True)
        res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan)

        res['chgPct'] = df.chgPct
        res = res.reset_index()
        return pd.merge(res, universe_df[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
예제 #5
0
    def fetch_risk_model_range(
            self,
            universe: Universe,
            start_date: str = None,
            end_date: str = None,
            dates: Iterable[str] = None,
            risk_model: str = 'short',
            excluded: Iterable[str] = None
    ) -> Tuple[pd.DataFrame, pd.DataFrame]:

        risk_cov_table, special_risk_col = _map_risk_model_table(risk_model)

        cov_risk_cols = [
            risk_cov_table.__table__.columns[f] for f in total_risk_factors
        ]

        cond = risk_cov_table.trade_date.in_(
            dates) if dates else risk_cov_table.trade_date.between(
                start_date, end_date)
        query = select([
            risk_cov_table.trade_date, risk_cov_table.FactorID,
            risk_cov_table.Factor
        ] + cov_risk_cols).where(cond)

        risk_cov = pd.read_sql(query, self.engine).sort_values(
            ['trade_date', 'FactorID'])

        if not excluded:
            excluded = []

        risk_exposure_cols = [
            FullFactor.__table__.columns[f] for f in total_risk_factors
            if f not in set(excluded)
        ]

        cond = universe._query_statements(start_date, end_date, dates)

        big_table = join(
            FullFactor, UniverseTable,
            and_(FullFactor.trade_date == UniverseTable.trade_date,
                 FullFactor.code == UniverseTable.code, cond))

        query = select(
            [FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols).select_from(big_table) \
            .distinct()

        risk_exp = pd.read_sql(query, self.engine)

        if universe.is_filtered:
            codes = universe.query(self, start_date, end_date, dates)
            risk_exp = pd.merge(risk_exp,
                                codes,
                                how='inner',
                                on=['trade_date', 'code'
                                    ]).sort_values(['trade_date', 'code'])

        return risk_cov, risk_exp
예제 #6
0
    def fetch_factor_range_forward(self,
                                   universe: Universe,
                                   factors: Union[Transformer, object],
                                   start_date: str = None,
                                   end_date: str = None,
                                   dates: Iterable[str] = None):
        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency
        factor_cols = _map_factors(dependency, factor_tables)

        codes = universe.query(self, start_date, end_date, dates)
        total_codes = codes.code.unique().tolist()
        total_dates = codes.trade_date.astype(str).unique().tolist()

        big_table = Market
        joined_tables = set()
        joined_tables.add(Market.__table__.name)

        for t in set(factor_cols.values()):
            if t.__table__.name not in joined_tables:
                if dates is not None:
                    big_table = outerjoin(
                        big_table, t,
                        and_(Market.trade_date == t.trade_date,
                             Market.code == t.code,
                             Market.trade_date.in_(dates)))
                else:
                    big_table = outerjoin(
                        big_table, t,
                        and_(Market.trade_date == t.trade_date,
                             Market.code == t.code,
                             Market.trade_date.between(start_date, end_date)))
                joined_tables.add(t.__table__.name)

        stats = func.lag(list(factor_cols.keys())[0],
                         -1).over(partition_by=Market.code,
                                  order_by=Market.trade_date).label('dx')

        query = select([Market.trade_date, Market.code, Market.chgPct,
                        stats]).select_from(big_table).where(
                            and_(Market.trade_date.in_(total_dates),
                                 Market.code.in_(total_codes)))

        df = pd.read_sql(query, self.engine) \
            .replace([-np.inf, np.inf], np.nan) \
            .sort_values(['trade_date', 'code'])
        return pd.merge(df, codes[['trade_date', 'code']],
                        how='inner').drop_duplicates(['trade_date', 'code'])
예제 #7
0
 def fetch_codes_range(self,
                       universe: Universe,
                       start_date: str = None,
                       end_date: str = None,
                       dates: Iterable[str] = None) -> pd.DataFrame:
     return universe.query(self, start_date, end_date, dates)
예제 #8
0
 def fetch_codes(self, ref_date: str, universe: Universe) -> List[int]:
     df = universe.query(self, ref_date, ref_date)
     return sorted(df.code.tolist())
예제 #9
0
    def fetch_factor_range(self,
                           universe: Universe,
                           factors: Union[Transformer, Iterable[object]],
                           start_date: str = None,
                           end_date: str = None,
                           dates: Iterable[str] = None,
                           external_data: pd.DataFrame = None,
                           used_factor_tables=None) -> pd.DataFrame:

        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency

        if used_factor_tables:
            factor_cols = _map_factors(dependency, used_factor_tables)
        else:
            factor_cols = _map_factors(dependency, factor_tables)

        big_table = FullFactor
        joined_tables = set()
        joined_tables.add(FullFactor.__table__.name)

        for t in set(factor_cols.values()):
            if t.__table__.name not in joined_tables:
                if dates is not None:
                    big_table = outerjoin(
                        big_table, t,
                        and_(FullFactor.trade_date == t.trade_date,
                             FullFactor.code == t.code,
                             FullFactor.trade_date.in_(dates)))
                else:
                    big_table = outerjoin(
                        big_table, t,
                        and_(
                            FullFactor.trade_date == t.trade_date,
                            FullFactor.code == t.code,
                            FullFactor.trade_date.between(
                                start_date, end_date)))
                joined_tables.add(t.__table__.name)

        universe_df = universe.query(self, start_date, end_date, dates)

        query = select(
            [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
            .select_from(big_table).where(
                and_(
                    FullFactor.code.in_(universe_df.code.unique().tolist()),
                    FullFactor.trade_date.in_(dates) if dates is not None else FullFactor.trade_date.between(start_date, end_date)
                )
        ).distinct()

        df = pd.read_sql(query, self.engine)
        if universe.is_filtered:
            codes = universe.query(self, start_date, end_date, dates)
            df = pd.merge(df, codes, how='inner', on=['trade_date', 'code'])

        if external_data is not None:
            df = pd.merge(df, external_data, on=['trade_date',
                                                 'code']).dropna()

        df.sort_values(['trade_date', 'code'], inplace=True)
        df.set_index('trade_date', inplace=True)
        res = transformer.transform('code', df)

        for col in res.columns:
            if col not in set(['code', 'isOpen']) and col not in df.columns:
                df[col] = res[col].values

        df['isOpen'] = df.isOpen.astype(bool)
        df = df.reset_index()
        return pd.merge(df, universe_df[['trade_date', 'code']], how='inner')