Example #1
0
    def fetch_factor_range(self,
                           universe: Universe,
                           factors: Union[Transformer, Iterable[object]],
                           start_date: str = None,
                           end_date: str = None,
                           dates: Iterable[str] = None,
                           external_data: pd.DataFrame = None,
                           used_factor_tables=None) -> pd.DataFrame:

        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency

        if used_factor_tables:
            factor_cols = _map_factors(dependency, used_factor_tables)
        else:
            factor_cols = _map_factors(dependency, factor_tables)

        big_table = Market
        joined_tables = set()
        joined_tables.add(Market.__table__.name)

        for t in set(factor_cols.values()):
            if t.__table__.name not in joined_tables:
                if dates is not None:
                    big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date,
                                                             Market.code == t.code,
                                                             Market.trade_date.in_(dates)))
                else:
                    big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date,
                                                             Market.code == t.code,
                                                             Market.trade_date.between(start_date, end_date)))
                joined_tables.add(t.__table__.name)

        universe_df = universe.query(self, start_date, end_date, dates)

        query = select(
            [Market.trade_date, Market.code, Market.chgPct] + list(factor_cols.keys())) \
            .select_from(big_table).where(
                and_(
                    Market.code.in_(universe_df.code.unique().tolist()),
                    Market.trade_date.in_(dates) if dates is not None else Market.trade_date.between(start_date, end_date)
                )
        ).distinct()

        df = pd.read_sql(query, self.engine).replace([-np.inf, np.inf], np.nan)

        if external_data is not None:
            df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna()

        df.sort_values(['trade_date', 'code'], inplace=True)
        df.set_index('trade_date', inplace=True)
        res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan)

        res['chgPct'] = df.chgPct
        res = res.reset_index()
        return pd.merge(res, universe_df[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
Example #2
0
    def fetch_factor(self,
                     ref_date: str,
                     factors: Iterable[object],
                     codes: Iterable[int],
                     warm_start: int = 0,
                     used_factor_tables=None) -> pd.DataFrame:

        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency

        if used_factor_tables:
            factor_cols = _map_factors(dependency, used_factor_tables)
        else:
            factor_cols = _map_factors(dependency, factor_tables)

        start_date = advanceDateByCalendar('china.sse', ref_date,
                                           str(-warm_start) +
                                           'b').strftime('%Y-%m-%d')
        end_date = ref_date

        big_table = Market
        joined_tables = set()
        joined_tables.add(Market.__table__.name)

        for t in set(factor_cols.values()):
            if t.__table__.name not in joined_tables:
                big_table = outerjoin(
                    big_table, t,
                    and_(Market.trade_date == t.trade_date,
                         Market.code == t.code))

                joined_tables.add(t.__table__.name)

        query = select(
            [Market.trade_date, Market.code, Market.chgPct, Market.secShortName] + list(
                factor_cols.keys())) \
            .select_from(big_table).where(and_(Market.trade_date.between(start_date, end_date),
                                               Market.code.in_(codes)))

        df = pd.read_sql(query, self.engine) \
            .replace([-np.inf, np.inf], np.nan) \
            .sort_values(['trade_date', 'code']) \
            .set_index('trade_date')
        res = transformer.transform('code', df).replace([-np.inf, np.inf],
                                                        np.nan)

        res['chgPct'] = df.chgPct
        res['secShortName'] = df['secShortName']
        res = res.loc[ref_date:ref_date, :]
        res.index = list(range(len(res)))
        return res
Example #3
0
    def fetch_factor(self,
                     ref_date: str,
                     factors: Iterable[object],
                     codes: Iterable[int],
                     warm_start: int = 0,
                     used_factor_tables=None) -> pd.DataFrame:

        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency

        if used_factor_tables:
            factor_cols = _map_factors(dependency, used_factor_tables)
        else:
            factor_cols = _map_factors(dependency, factor_tables)

        start_date = advanceDateByCalendar('china.sse', ref_date,
                                           str(-warm_start) +
                                           'b').strftime('%Y-%m-%d')
        end_date = ref_date

        big_table = FullFactor

        for t in set(factor_cols.values()):
            if t.__table__.name != FullFactor.__table__.name:
                big_table = outerjoin(
                    big_table, t,
                    and_(FullFactor.trade_date == t.trade_date,
                         FullFactor.code == t.code))

        query = select(
            [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
            .select_from(big_table).where(and_(FullFactor.trade_date.between(start_date, end_date),
                                               FullFactor.code.in_(codes)))

        df = pd.read_sql(query,
                         self.engine).sort_values(['trade_date', 'code'
                                                   ]).set_index('trade_date')
        res = transformer.transform('code', df)

        for col in res.columns:
            if col not in set(['code', 'isOpen']) and col not in df.columns:
                df[col] = res[col].values

        df['isOpen'] = df.isOpen.astype(bool)
        df = df.loc[ref_date]
        df.index = list(range(len(df)))
        return df
Example #4
0
    def query(self,
              engine,
              start_date: str = None,
              end_date: str = None,
              dates=None) -> pd.DataFrame:

        universe_cond = self._query_statements(start_date, end_date, dates)

        if self.filter_cond is None and self.exclude_universe is None:
            # simple case
            query = select([UniverseTable.trade_date, UniverseTable.code
                            ]).where(universe_cond).distinct()
            return pd.read_sql(query, engine.engine)
        else:
            if self.filter_cond is not None:
                if isinstance(self.filter_cond, Transformer):
                    transformer = self.filter_cond
                else:
                    transformer = Transformer(self.filter_cond)

                dependency = transformer.dependency
                factor_cols = _map_factors(dependency, factor_tables)
                big_table = Market

                for t in set(factor_cols.values()):
                    if t.__table__.name != Market.__table__.name:
                        big_table = outerjoin(
                            big_table, t,
                            and_(
                                Market.trade_date == t.trade_date,
                                Market.code == t.code,
                                Market.trade_date.in_(dates)
                                if dates else Market.trade_date.between(
                                    start_date, end_date)))
                big_table = join(
                    big_table, UniverseTable,
                    and_(Market.trade_date == UniverseTable.trade_date,
                         Market.code == UniverseTable.code, universe_cond))

                query = select(
                    [Market.trade_date, Market.code] + list(factor_cols.keys())) \
                    .select_from(big_table).distinct()

                df = pd.read_sql(query, engine.engine).sort_values(
                    ['trade_date', 'code']).dropna()
                df.set_index('trade_date', inplace=True)
                filter_fields = transformer.names
                pyFinAssert(
                    len(filter_fields) == 1, ValueError,
                    "filter fields can only be 1")
                df = transformer.transform('code', df)
                df = df[df[filter_fields[0]] == 1].reset_index()[[
                    'trade_date', 'code'
                ]]
            return df
Example #5
0
    def fetch_factor_range_forward(self,
                                   universe: Universe,
                                   factors: Union[Transformer, object],
                                   start_date: str = None,
                                   end_date: str = None,
                                   dates: Iterable[str] = None):
        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency
        factor_cols = _map_factors(dependency, factor_tables)

        codes = universe.query(self, start_date, end_date, dates)
        total_codes = codes.code.unique().tolist()
        total_dates = codes.trade_date.astype(str).unique().tolist()

        big_table = Market
        joined_tables = set()
        joined_tables.add(Market.__table__.name)

        for t in set(factor_cols.values()):
            if t.__table__.name not in joined_tables:
                if dates is not None:
                    big_table = outerjoin(
                        big_table, t,
                        and_(Market.trade_date == t.trade_date,
                             Market.code == t.code,
                             Market.trade_date.in_(dates)))
                else:
                    big_table = outerjoin(
                        big_table, t,
                        and_(Market.trade_date == t.trade_date,
                             Market.code == t.code,
                             Market.trade_date.between(start_date, end_date)))
                joined_tables.add(t.__table__.name)

        stats = func.lag(list(factor_cols.keys())[0],
                         -1).over(partition_by=Market.code,
                                  order_by=Market.trade_date).label('dx')

        query = select([Market.trade_date, Market.code, Market.chgPct,
                        stats]).select_from(big_table).where(
                            and_(Market.trade_date.in_(total_dates),
                                 Market.code.in_(total_codes)))

        df = pd.read_sql(query, self.engine) \
            .replace([-np.inf, np.inf], np.nan) \
            .sort_values(['trade_date', 'code'])
        return pd.merge(df, codes[['trade_date', 'code']],
                        how='inner').drop_duplicates(['trade_date', 'code'])
Example #6
0
    def fetch_factor_range(self,
                           universe: Universe,
                           factors: Union[Transformer, Iterable[object]],
                           start_date: str = None,
                           end_date: str = None,
                           dates: Iterable[str] = None,
                           external_data: pd.DataFrame = None,
                           used_factor_tables=None) -> pd.DataFrame:

        if isinstance(factors, Transformer):
            transformer = factors
        else:
            transformer = Transformer(factors)

        dependency = transformer.dependency

        if used_factor_tables:
            factor_cols = _map_factors(dependency, used_factor_tables)
        else:
            factor_cols = _map_factors(dependency, factor_tables)

        big_table = FullFactor
        joined_tables = set()
        joined_tables.add(FullFactor.__table__.name)

        for t in set(factor_cols.values()):
            if t.__table__.name not in joined_tables:
                if dates is not None:
                    big_table = outerjoin(
                        big_table, t,
                        and_(FullFactor.trade_date == t.trade_date,
                             FullFactor.code == t.code,
                             FullFactor.trade_date.in_(dates)))
                else:
                    big_table = outerjoin(
                        big_table, t,
                        and_(
                            FullFactor.trade_date == t.trade_date,
                            FullFactor.code == t.code,
                            FullFactor.trade_date.between(
                                start_date, end_date)))
                joined_tables.add(t.__table__.name)

        universe_df = universe.query(self, start_date, end_date, dates)

        query = select(
            [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
            .select_from(big_table).where(
                and_(
                    FullFactor.code.in_(universe_df.code.unique().tolist()),
                    FullFactor.trade_date.in_(dates) if dates is not None else FullFactor.trade_date.between(start_date, end_date)
                )
        ).distinct()

        df = pd.read_sql(query, self.engine)
        if universe.is_filtered:
            codes = universe.query(self, start_date, end_date, dates)
            df = pd.merge(df, codes, how='inner', on=['trade_date', 'code'])

        if external_data is not None:
            df = pd.merge(df, external_data, on=['trade_date',
                                                 'code']).dropna()

        df.sort_values(['trade_date', 'code'], inplace=True)
        df.set_index('trade_date', inplace=True)
        res = transformer.transform('code', df)

        for col in res.columns:
            if col not in set(['code', 'isOpen']) and col not in df.columns:
                df[col] = res[col].values

        df['isOpen'] = df.isOpen.astype(bool)
        df = df.reset_index()
        return pd.merge(df, universe_df[['trade_date', 'code']], how='inner')