def on_finish_entity(self, entity):
        super().on_finish_entity(entity)

        if not self.fetch_jq_timestamp:
            return

        # fill the timestamp for report published date
        the_data_list = get_data(
            data_schema=self.data_schema,
            provider=self.provider,
            entity_id=entity.id,
            order=self.data_schema.timestamp.asc(),
            return_type='domain',
            session=self.session,
            filters=[
                self.data_schema.timestamp == self.data_schema.report_date,
                self.data_schema.timestamp >= to_pd_timestamp('2005-01-01')
            ])
        if the_data_list:
            if self.data_schema == FinanceFactor:
                for the_data in the_data_list:
                    self.fill_timestamp_with_jq(entity, the_data)
            else:
                df = FinanceFactor.query_data(
                    entity_id=entity.id,
                    columns=[
                        FinanceFactor.timestamp, FinanceFactor.report_date,
                        FinanceFactor.id
                    ],
                    filters=[
                        FinanceFactor.timestamp != FinanceFactor.report_date,
                        FinanceFactor.timestamp >=
                        to_pd_timestamp('2005-01-01'),
                        FinanceFactor.report_date >=
                        the_data_list[0].report_date,
                        FinanceFactor.report_date <=
                        the_data_list[-1].report_date,
                    ])

                if pd_is_not_null(df):
                    index_df(df, index='report_date', time_field='report_date')

                for the_data in the_data_list:
                    if (df is not None) and (
                            not df.empty) and the_data.report_date in df.index:
                        the_data.timestamp = df.at[the_data.report_date,
                                                   'timestamp']
                        self.logger.info(
                            'db fill {} {} timestamp:{} for report_date:{}'.
                            format(self.data_schema, entity.id,
                                   the_data.timestamp, the_data.report_date))
                        self.session.commit()
                    else:
                        # self.logger.info(
                        #     'waiting jq fill {} {} timestamp:{} for report_date:{}'.format(self.data_schema,
                        #                                                                    security_item.id,
                        #                                                                    the_data.timestamp,
                        #                                                                    the_data.report_date))

                        self.fill_timestamp_with_jq(entity, the_data)
 def get_referenced_saved_record(self, entity):
     return get_data(region=self.region,
                     filters=[HkHolder.holder_code == entity.code],
                     provider=self.provider,
                     data_schema=self.data_schema,
                     columns=['id', self.get_evaluated_time_field()],
                     return_type='df')
Beispiel #3
0
    def load_factor(self):
        # read state
        states: List[FactorState] = FactorState.query_data(
            filters=[FactorState.factor_name == self.factor_name],
            entity_ids=self.entity_ids,
            return_type='domain')
        if states:
            for state in states:
                self.states[state.entity_id] = self.decode_state(state.state)

        if self.dry_run:
            # 如果只是为了计算因子,只需要读取acc_window的factor_df
            if self.accumulator is not None:
                self.factor_df = self.load_window_df(
                    provider='zvt',
                    data_schema=self.factor_schema,
                    window=self.accumulator.acc_window)
        else:
            self.factor_df = get_data(
                provider='zvt',
                data_schema=self.factor_schema,
                start_timestamp=self.start_timestamp,
                entity_ids=self.entity_ids,
                end_timestamp=self.end_timestamp,
                index=[self.category_field, self.time_field])

        col_map_object_hook = self.factor_col_map_object_hook()
        if pd_is_not_null(self.factor_df) and col_map_object_hook:
            for col in col_map_object_hook:
                if col in self.factor_df.columns:
                    self.factor_df[col] = self.factor_df[col].apply(
                        lambda x: json.loads(
                            x, object_hook=col_map_object_hook.get(col))
                        if x else None)
Beispiel #4
0
    def get_latest_saved_record(self, entity):
        # step = time.time()
        order = eval('self.data_schema.{}.desc()'.format(self.get_evaluated_time_field()))
        # self.logger.info("get order: {}".format(time.time()-step))

        # 对于k线这种数据,最后一个记录有可能是没完成的,所以取两个,总是删掉最后一个数据,更新之
        # self.logger.info("record info: {}, {}, {}".format(entity.id, order, self.level))
        records = get_data(region=self.region,
                           entity_id=entity.id,
                           provider=self.provider,
                           data_schema=self.data_schema,
                           order=order,
                           limit=2,
                           return_type='domain',
                           session=self.session,
                           level=self.level)
        # self.logger.info("get record: {}".format(time.time()-step))

        if records:
            # delete unfinished kdata
            if len(records) == 2:
                if is_in_same_interval(t1=records[0].timestamp, t2=records[1].timestamp, level=self.level):
                    self.session.delete(records[0])
                    self.session.flush()
                    return records[1]
            return records[0]
        return None
Beispiel #5
0
def get_position(region: Region,
                 trader_name=None,
                 return_type='df',
                 start_timestamp=None,
                 end_timestamp=None,
                 filters=None,
                 session=None,
                 order=None,
                 limit=None):
    if trader_name:
        if filters:
            filters = filters + [Position.trader_name == trader_name]
        else:
            filters = [Position.trader_name == trader_name]

    return get_data(region=region,
                    data_schema=Position,
                    entity_id=None,
                    codes=None,
                    level=None,
                    provider=Provider.ZVT,
                    columns=None,
                    return_type=return_type,
                    start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp,
                    filters=filters,
                    session=session,
                    order=order,
                    limit=limit)
Beispiel #6
0
def get_trader_info(trader_name=None,
                    return_type='df',
                    start_timestamp=None,
                    end_timestamp=None,
                    filters=None,
                    session=None,
                    order=None,
                    limit=None) -> List[trader_info.TraderInfo]:
    if trader_name:
        if filters:
            filters = filters + [
                trader_info.TraderInfo.trader_name == trader_name
            ]
        else:
            filters = [trader_info.TraderInfo.trader_name == trader_name]

    return get_data(data_schema=trader_info.TraderInfo,
                    entity_id=None,
                    codes=None,
                    level=None,
                    provider='zvt',
                    columns=None,
                    return_type=return_type,
                    start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp,
                    filters=filters,
                    session=session,
                    order=order,
                    limit=limit)
Beispiel #7
0
    def load_factor(self):
        if self.dry_run:
            #: 如果只是为了计算因子,只需要读取acc_window的factor_df
            if self.accumulator is not None:
                self.factor_df = self.load_window_df(
                    provider="zvt",
                    data_schema=self.factor_schema,
                    window=self.accumulator.acc_window)
        else:
            self.factor_df = get_data(
                provider="zvt",
                data_schema=self.factor_schema,
                start_timestamp=self.start_timestamp,
                entity_ids=self.entity_ids,
                end_timestamp=self.end_timestamp,
                index=[self.category_field, self.time_field],
            )

        col_map_object_hook = self.factor_col_map_object_hook()
        if pd_is_not_null(self.factor_df) and col_map_object_hook:
            for col in col_map_object_hook:
                if col in self.factor_df.columns:
                    self.factor_df[col] = self.factor_df[col].apply(
                        lambda x: json.loads(
                            x, object_hook=col_map_object_hook.get(col))
                        if x else None)
Beispiel #8
0
    def record(self, entity, start, end, size, timestamps):
        try:
            industry_stocks = get_industry_stocks(entity.code, date=now_pd_timestamp())
        except:
            industry_stocks = get_concept_stocks(entity.code, date=now_pd_timestamp())
        if len(industry_stocks) == 0:
            return None
        df = pd.DataFrame({"stock": industry_stocks})
        df["stock_id"] = df.stock.apply(lambda x: to_entity_id(x, "stock"))
        df["stock_code"] = df.stock_id.str.split("_", expand=True)[2]
        df["stock_name"] = df.stock_id.apply(lambda x:get_data(data_schema=Stock, entity_id=x, provider='joinquant').name)
        df["block_type"] = entity.block_type
        df["code"] = entity.code
        df["name"] = entity.name
        df["exchange"] = entity.exchange
        df["timestamp"] = now_pd_timestamp()
        df["entity_id"] = entity.id
        df["entity_type"] = "block"
        df["id"] = df.apply(lambda x: x.entity_id + "_" + x.stock_id, axis=1)
        if df.empty:
            return None
        df_to_db(data_schema=self.data_schema, df=df, provider=self.provider,
                 force_update=True)

        self.logger.info('finish recording BlockStock:{},{}'.format(entity.category, entity.name))
Beispiel #9
0
 def get_referenced_saved_record(self, entity):
     return get_data(region=self.region,
                     entity_id=entity.id,
                     provider=self.provider,
                     data_schema=self.data_schema,
                     columns=['id', self.get_evaluated_time_field()],
                     return_type='df')
Beispiel #10
0
    def get_latest_saved_record(self, entity):
        order = eval("self.data_schema.{}.desc()".format(
            self.get_evaluated_time_field()))

        #: 对于k线这种数据,最后一个记录有可能是没完成的,所以取两个
        #: 同一周期内只保留最新的一个数据
        records = get_data(
            entity_id=entity.id,
            provider=self.provider,
            data_schema=self.data_schema,
            order=order,
            limit=2,
            return_type="domain",
            session=self.session,
            level=self.level,
        )
        if records:
            #: delete unfinished kdata
            if len(records) == 2:
                if is_in_same_interval(t1=records[0].timestamp,
                                       t2=records[1].timestamp,
                                       level=self.level):
                    self.session.delete(records[1])
                    self.session.flush()
            return records[0]
        return None
Beispiel #11
0
    def generate_domain(self, entity, original_data):
        """
        generate the data_schema instance using entity and original_data,the original_data is from record result

        :param entity:
        :param original_data:
        """

        got_new_data = False

        #: if the domain is directly generated in record method, we just return it
        if isinstance(original_data, self.data_schema):
            got_new_data = True
            return got_new_data, original_data

        the_id = self.generate_domain_id(entity, original_data)

        #: optional way
        #: item = self.session.query(self.data_schema).get(the_id)

        items = get_data(
            data_schema=self.data_schema,
            session=self.session,
            provider=self.provider,
            entity_id=entity.id,
            filters=[self.data_schema.id == the_id],
            return_type="domain",
        )

        if items and not self.force_update:
            self.logger.info("ignore the data {}:{} saved before".format(
                self.data_schema, the_id))
            return got_new_data, None

        if not items:
            timestamp_str = original_data[self.get_original_time_field()]
            timestamp = None
            try:
                timestamp = to_pd_timestamp(timestamp_str)
            except Exception as e:
                self.logger.exception(e)

            if "name" in get_schema_columns(self.data_schema):
                domain_item = self.data_schema(id=the_id,
                                               code=entity.code,
                                               name=entity.name,
                                               entity_id=entity.id,
                                               timestamp=timestamp)
            else:
                domain_item = self.data_schema(id=the_id,
                                               code=entity.code,
                                               entity_id=entity.id,
                                               timestamp=timestamp)
            got_new_data = True
        else:
            domain_item = items[0]

        fill_domain_from_dict(domain_item, original_data, self.get_data_map())
        return got_new_data, domain_item
Beispiel #12
0
def get_account_stats(trader_name=None, return_type='df', start_timestamp=None, end_timestamp=None,
                      filters=None, session=None, order=None, limit=None):
    if trader_name:
        if filters:
            filters = filters + [AccountStats.trader_name == trader_name]
        else:
            filters = [AccountStats.trader_name == trader_name]

    return get_data(data_schema=AccountStats, entity_id=None, codes=None, level=None, provider='zvt',
                    columns=None, return_type=return_type, start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp, filters=filters, session=session, order=order, limit=limit)
Beispiel #13
0
    def get_latest_saved_record(self, entity):
        order = eval('self.data_schema.{}.desc()'.format(self.get_evaluated_time_field()))

        records = get_data(entity_id=entity.id,
                           provider=self.provider,
                           data_schema=self.data_schema,
                           order=order,
                           limit=1,
                           return_type='domain',
                           session=self.session)
        if records:
            return records[0]
        return None
    def get_latest_saved_record(self, entity):
        order = eval('self.data_schema.{}.desc()'.format(
            self.get_evaluated_time_field()))

        records = get_data(region=self.region,
                           filters=[HkHolder.holder_code == entity.code],
                           provider=self.provider,
                           data_schema=self.data_schema,
                           order=order,
                           limit=1,
                           return_type='domain',
                           session=self.session)
        if records:
            return records[0]
        return None
Beispiel #15
0
    def evaluate_start_end_size_timestamps(self, entity):
        # get latest record
        latest_record = get_data(entity_id=entity.id,
                                 provider=self.provider,
                                 data_schema=self.data_schema,
                                 order=self.data_schema.timestamp.desc(), limit=1,
                                 return_type='domain',
                                 session=self.session)
        if latest_record:
            remote_record = self.get_remote_latest_record(entity)
            if not remote_record or (
                    latest_record[0].id == remote_record.id):
                return None, None, 0, None
            else:
                return None, None, 10, None

        return None, None, 1000, None
Beispiel #16
0
    def __init__(self,
                 data_schema: Type[Mixin],
                 entity_schema: Type[TradableEntity] = None,
                 provider: str = None,
                 entity_provider: str = None,
                 entity_ids: List[str] = None,
                 exchanges: List[str] = None,
                 codes: List[str] = None,
                 the_timestamp: Union[str, pd.Timestamp] = None,
                 start_timestamp: Union[str, pd.Timestamp] = None,
                 end_timestamp: Union[str, pd.Timestamp] = None,
                 columns: List = None,
                 filters: List = None,
                 order: object = None,
                 limit: int = None,
                 level: Union[str, IntervalLevel] = None,
                 category_field: str = 'entity_id',
                 time_field: str = 'timestamp',
                 computing_window: int = None,
                 # child added arguments
                 keep_all_timestamp: bool = False,
                 fill_method: str = 'ffill',
                 effective_number: int = None,
                 transformer: Transformer = None,
                 accumulator: Accumulator = None,
                 need_persist: bool = False,
                 dry_run: bool = False,
                 factor_name: str = None,
                 clear_state: bool = False,
                 not_load_data: bool = False) -> None:
        """

        :param computing_window: the window size for computing factor
        :param keep_all_timestamp: whether fill all timestamp gap,default False
        :param fill_method:
        :param effective_number:
        :param transformer:
        :param accumulator:
        :param need_persist: whether persist factor
        :param dry_run: True for just computing factor, False for backtesting
        """

        self.not_load_data = not_load_data

        super().__init__(data_schema, entity_schema, provider, entity_provider, entity_ids, exchanges, codes,
                         the_timestamp, start_timestamp, end_timestamp, columns, filters, order, limit, level,
                         category_field, time_field, computing_window)

        # define unique name of your factor if you want to keep factor state
        # the factor state is defined by factor_name and entity_id
        if not factor_name:
            self.factor_name = type(self).__name__.lower()
        else:
            self.factor_name = factor_name

        self.clear_state = clear_state

        self.keep_all_timestamp = keep_all_timestamp
        self.fill_method = fill_method
        self.effective_number = effective_number

        if transformer:
            self.transformer = transformer
        else:
            self.transformer = self.__class__.transformer

        if accumulator:
            self.accumulator = accumulator
        else:
            self.accumulator = self.__class__.accumulator

        self.need_persist = need_persist
        self.dry_run = dry_run

        # 中间结果,不持久化
        # data_df->pipe_df
        self.pipe_df: pd.DataFrame = None

        # 计算因子的结果,可持久化,通过对pipe_df的计算得到
        # pipe_df->factor_df
        self.factor_df: pd.DataFrame = None

        # result_df是用于选股的标准df,通过对factor_df的计算得到
        # factor_df->result_df
        self.result_df: pd.DataFrame = None

        # entity_id:state
        self.states: dict = {}

        if self.clear_state:
            self.clear_state_data()
        elif self.need_persist:
            self.load_factor()

            # 根据已经计算的factor_df和computing_window来保留data_df
            # 因为读取data_df的目的是为了计算factor_df,选股和回测只依赖factor_df
            # 所以如果有持久化的factor_df,只需保留需要用于计算的data_df即可
            if pd_is_not_null(self.data_df) and self.computing_window:
                dfs = []
                for entity_id, df in self.data_df.groupby(level=0):
                    latest_laved = get_data(provider='zvt',
                                            data_schema=self.factor_schema,
                                            entity_id=entity_id,
                                            order=self.factor_schema.timestamp.desc(),
                                            limit=1,
                                            index=[self.category_field, self.time_field], return_type='domain')
                    if latest_laved:
                        df1 = df[df.timestamp < latest_laved[0].timestamp].iloc[-self.computing_window:]
                        if pd_is_not_null(df1):
                            df = df[df.timestamp >= df1.iloc[0].timestamp]
                    dfs.append(df)

                self.data_df = pd.concat(dfs)

        self.register_data_listener(self)

        # the compute logic is not triggered from load data
        # for the case:1)load factor from db 2)compute the result
        if self.not_load_data:
            self.compute()
Beispiel #17
0
    def __init__(
        self,
        data_schema: Type[Mixin],
        entity_schema: Type[TradableEntity] = None,
        provider: str = None,
        entity_provider: str = None,
        entity_ids: List[str] = None,
        exchanges: List[str] = None,
        codes: List[str] = None,
        start_timestamp: Union[str, pd.Timestamp] = None,
        end_timestamp: Union[str, pd.Timestamp] = None,
        columns: List = None,
        filters: List = None,
        order: object = None,
        limit: int = None,
        level: Union[str, IntervalLevel] = IntervalLevel.LEVEL_1DAY,
        category_field: str = "entity_id",
        time_field: str = "timestamp",
        computing_window: int = None,
        keep_all_timestamp: bool = False,
        fill_method: str = "ffill",
        effective_number: int = None,
        transformer: Transformer = None,
        accumulator: Accumulator = None,
        need_persist: bool = False,
        only_compute_factor: bool = False,
        factor_name: str = None,
        clear_state: bool = False,
        only_load_factor: bool = False,
    ) -> None:
        """
        :param keep_all_timestamp:
        :param fill_method:
        :param effective_number:
        :param transformer:
        :param accumulator:
        :param need_persist: whether persist factor
        :param only_compute_factor: only compute factor nor result
        :param factor_name:
        :param clear_state:
        :param only_load_factor: only load factor and compute result
        """
        self.only_load_factor = only_load_factor

        #: define unique name of your factor if you want to keep factor state
        #: the factor state is defined by factor_name and entity_id
        if not factor_name:
            self.name = to_snake_str(type(self).__name__)
        else:
            self.name = factor_name

        DataReader.__init__(
            self,
            data_schema,
            entity_schema,
            provider,
            entity_provider,
            entity_ids,
            exchanges,
            codes,
            start_timestamp,
            end_timestamp,
            columns,
            filters,
            order,
            limit,
            level,
            category_field,
            time_field,
            computing_window,
        )

        EntityStateService.__init__(self, entity_ids=entity_ids)

        self.clear_state = clear_state

        self.keep_all_timestamp = keep_all_timestamp
        self.fill_method = fill_method
        self.effective_number = effective_number

        if transformer:
            self.transformer = transformer
        else:
            self.transformer = self.__class__.transformer

        if accumulator:
            self.accumulator = accumulator
        else:
            self.accumulator = self.__class__.accumulator

        self.need_persist = need_persist
        self.dry_run = only_compute_factor

        #: 中间结果,不持久化
        #: data_df->pipe_df
        self.pipe_df: pd.DataFrame = None

        #: 计算因子的结果,可持久化,通过对pipe_df的计算得到
        #: pipe_df->factor_df
        self.factor_df: pd.DataFrame = None

        #: result_df是用于选股的标准df,通过对factor_df的计算得到
        #: factor_df->result_df
        self.result_df: pd.DataFrame = None

        if self.clear_state:
            self.clear_state_data()
        elif self.need_persist or self.only_load_factor:
            self.load_factor()

            #: 根据已经计算的factor_df和computing_window来保留data_df
            #: 因为读取data_df的目的是为了计算factor_df,选股和回测只依赖factor_df
            #: 所以如果有持久化的factor_df,只需保留需要用于计算的data_df即可
            if pd_is_not_null(self.data_df) and self.computing_window:
                dfs = []
                for entity_id, df in self.data_df.groupby(level=0):
                    latest_laved = get_data(
                        provider="zvt",
                        data_schema=self.factor_schema,
                        entity_id=entity_id,
                        order=self.factor_schema.timestamp.desc(),
                        limit=1,
                        index=[self.category_field, self.time_field],
                        return_type="domain",
                    )
                    if latest_laved:
                        df1 = df[df.timestamp < latest_laved[0].
                                 timestamp].iloc[-self.computing_window:]
                        if pd_is_not_null(df1):
                            df = df[df.timestamp >= df1.iloc[0].timestamp]
                    dfs.append(df)

                self.data_df = pd.concat(dfs)

        self.register_data_listener(self)

        #: the compute logic is not triggered from load data
        #: for the case:1)load factor from db 2)compute the result
        if self.only_load_factor:
            self.compute()
Beispiel #18
0
    def record2(self, entity, start, end, size, timestamps):
        if not end:
            end = to_time_str(now_pd_timestamp())
        if (pd.to_datetime(end) - start).days >= 30:
            from datetime import timedelta
            end = to_time_str(start + timedelta(days=30))
        start = to_time_str(start)
        if start == end:
            return None
        # 暂不处理港股
        if 'hk' in entity.id:
            return None
        exchange = 'SH' if 'sh' in entity.id else 'SZ'
        em_code = entity.code + '.' + exchange
        columns_list = {
            'TOTALSHARE': 'capitalization',  # 总股本
            'LIQSHARE': 'circulating_cap',  # 流通股本
            'MV': 'market_cap',  # 总市值
            'LIQMV': 'circulating_market_cap',  # 流通市值
            'TURN': 'turnover_ratio',  # 换手率
            'PELYR': 'pe',  # 静态pe
            'PETTM': 'pe_ttm',  # 动态pe
            'PBLYR': 'pb',  # 市净率PB(最新年报)
            'PBMRQ': 'pb_mrq',  # 市净率PB(MRQ)
            'PSTTM': 'ps_ttm',  # 市销率PS(TTM)
            'PCFTTM': 'pcf_ttm',  # 市现率PCF(最新年报,经营性现金流)
        }
        # df = c.csd(em_code, [i for i in columns_list.keys()], start,end,"ispandas=1,DelType=2")
        df = get_data(data_schema=StockValuation, entity_id=entity.id, provider='joinquant', start_timestamp=start,
                      end_timestamp=end)
        if df.empty:
            df = get_data(data_schema=StockValuation, entity_id=entity.id, provider='joinquant', limit=1)
            start = df.timestamp[0]
            end = to_time_str(start + timedelta(days=30))
            df = get_data(data_schema=StockValuation, entity_id=entity.id, provider='joinquant', start_timestamp=start,
                          end_timestamp=end)
        if df.empty:
            return None
        df.rename(columns={
            "ps": "ps_ttm",
            "pcf": "pcf_ttm",
        }, inplace=True)
        trade_day = StockTradeDay.query_data(order=StockTradeDay.timestamp.desc(), start_timestamp=start,
                                             end_timestamp=end)
        df_capital_all = pd.DataFrame()
        for tradeday in trade_day.timestamp:
            df_capital = c.css(em_code, "WACC,DIVIDENDYIELDNEW",
                               f"TradeDate={to_time_str(tradeday)},FrIndex=1,MrIndex=1,ispandas=1")
            try:
                df_capital['DATES'] = tradeday
            except:
                continue
            df_capital_all = df_capital_all.append(df_capital)
            # 'DIVIDENDYIELDNEW': 'div_yield', #股息率

        try:
            if df.empty:
                return None
        except:
            self.logger.info(f'choice数据源的个股估值尚未准备完成,获取失败。'
                             f'股票代码:{em_code}-开始时间:{start}-结束时间:{end}')
            return None
        df['CODES'] = df_capital_all.index[0]
        df['DATES'] = df['timestamp']
        df_capital_all['DATES'] = pd.to_datetime(df_capital_all['DATES'])

        df_capital_all.rename(columns={"DIVIDENDYIELDNEW": "div_yield", "WACC": "wacc"}, inplace=True)
        df = pd.merge(df, df_capital_all, on=['CODES', 'DATES'], how='outer')
        df.dropna(subset=['id'],inplace=True)
        df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

        return None
Beispiel #19
0
    def __init__(
            self,
            data_schema: Type[Mixin],
            entity_schema: Type[EntityMixin] = None,
            provider: str = None,
            entity_provider: str = None,
            entity_ids: List[str] = None,
            exchanges: List[str] = None,
            codes: List[str] = None,
            the_timestamp: Union[str, pd.Timestamp] = None,
            start_timestamp: Union[str, pd.Timestamp] = None,
            end_timestamp: Union[str, pd.Timestamp] = None,
            columns: List = None,
            filters: List = None,
            order: object = None,
            limit: int = None,
            level: Union[str, IntervalLevel] = None,
            category_field: str = 'entity_id',
            time_field: str = 'timestamp',
            computing_window: int = None,
            # child added arguments
            keep_all_timestamp: bool = False,
            fill_method: str = 'ffill',
            effective_number: int = None,
            transformer: Transformer = None,
            accumulator: Accumulator = None,
            need_persist: bool = False,
            dry_run: bool = False) -> None:
        """

        :param computing_window: the window size for computing factor
        :param keep_all_timestamp: whether fill all timestamp gap,default False
        :param fill_method:
        :param effective_number:
        :param transformer:
        :param accumulator:
        :param need_persist: whether persist factor
        :param dry_run: True for just computing factor, False for backtesting
        """

        super().__init__(data_schema, entity_schema, provider, entity_provider,
                         entity_ids, exchanges, codes, the_timestamp,
                         start_timestamp, end_timestamp, columns, filters,
                         order, limit, level, category_field, time_field,
                         computing_window)

        self.factor_name = type(self).__name__.lower()

        self.keep_all_timestamp = keep_all_timestamp
        self.fill_method = fill_method
        self.effective_number = effective_number
        self.transformer = transformer
        self.accumulator = accumulator

        self.need_persist = need_persist
        self.dry_run = dry_run

        # 中间结果,不持久化
        # data_df->pipe_df
        self.pipe_df: pd.DataFrame = None

        # 计算因子的结果,可持久化,通过对pipe_df的计算得到
        # pipe_df->factor_df
        self.factor_df: pd.DataFrame = None

        # result_df是用于选股的标准df,通过对factor_df的计算得到
        # factor_df->result_df
        self.result_df: pd.DataFrame = None

        # the feature of persisting factor is not good yet,may change the latter
        if self.need_persist:
            if self.dry_run:
                # 如果只是为了计算因子,只需要读取acc_window的factor_df
                if self.accumulator is not None:
                    self.factor_df = self.load_window_df(
                        provider='zvt',
                        data_schema=self.factor_schema,
                        window=accumulator.acc_window)
            else:
                self.factor_df = get_data(
                    provider='zvt',
                    data_schema=self.factor_schema,
                    start_timestamp=self.start_timestamp,
                    end_timestamp=self.end_timestamp,
                    index=[self.category_field, self.time_field])

            # 根据已经计算的factor_df和computing_window来保留data_df
            # 因为读取data_df的目的是为了计算factor_df,选股和回测只依赖factor_df
            # 所以如果有持久化的factor_df,只需保留需要用于计算的data_df即可
            if pd_is_not_null(self.data_df) and self.computing_window:
                dfs = []
                for entity_id, df in self.data_df.groupby(level=0):
                    latest_laved = get_data(
                        provider='zvt',
                        data_schema=self.factor_schema,
                        entity_id=entity_id,
                        order=self.factor_schema.timestamp.desc(),
                        limit=1,
                        index=[self.category_field, self.time_field],
                        return_type='domain')
                    if latest_laved:
                        df1 = df[df.timestamp < latest_laved[0].
                                 timestamp].iloc[-self.computing_window:]
                        if pd_is_not_null(df1):
                            df = df[df.timestamp >= df1.iloc[0].timestamp]
                    dfs.append(df)

                self.data_df = pd.concat(dfs)

        self.register_data_listener(self)