Example #1
0
    def generate_targets(self):
        if pd_is_not_null(self.filter_result) and pd_is_not_null(
                self.score_result):
            # for long
            result1 = self.filter_result[self.filter_result.score]
            result2 = self.score_result[
                self.score_result.score >= self.long_threshold]
            long_result = result2.loc[result1.index, :]
            # for short
            result1 = self.filter_result[~self.filter_result.score]
            result2 = self.score_result[
                self.score_result.score <= self.short_threshold]
            short_result = result2.loc[result1.index, :]
        elif pd_is_not_null(self.score_result):
            long_result = self.score_result[
                self.score_result.score >= self.long_threshold]
            short_result = self.score_result[
                self.score_result.score <= self.short_threshold]
        else:
            long_result = self.filter_result[self.filter_result.score == True]
            short_result = self.filter_result[self.filter_result.score ==
                                              False]

        # filter in blocks
        if self.portfolio_selector:
            if pd_is_not_null(self.portfolio_selector.open_long_df):
                long_result = long_result[lambda df: self.in_block(
                    long_result, target_type=TargetType.open_long)]

            if pd_is_not_null(self.portfolio_selector.open_short_df):
                short_result = short_result[lambda df: self.in_block(
                    short_result, target_type=TargetType.open_short)]

        self.open_long_df = self.normalize_result_df(long_result)
        self.open_short_df = self.normalize_result_df(short_result)
Example #2
0
def get_trading_signals_figure(order_reader: OrderReader, entity_id: str,
                               provider: str, level):
    entity_type, _, _ = decode_entity_id(entity_id)
    security_factor = TechnicalFactor(entity_type=entity_type,
                                      entity_ids=[entity_id],
                                      level=level,
                                      provider=provider)

    if pd_is_not_null(security_factor.data_df):
        print(security_factor.data_df.tail())

    # generate the annotation df
    order_reader.move_on(timeout=0)
    df = order_reader.data_df.copy()
    if pd_is_not_null(df):
        df['value'] = df['order_price']
        df['flag'] = df['order_type'].apply(lambda x: order_type_flag(x))
        df['color'] = df['order_type'].apply(lambda x: order_type_color(x))
    print(df.tail())

    data, layout = security_factor.draw(render=None,
                                        figures=go.Candlestick,
                                        annotation_df=df)

    return go.Figure(data=data, layout=layout)
Example #3
0
    def do_compute(self):
        # 无状态的转换运算
        if pd_is_not_null(self.data_df) and self.transformer:
            self.pipe_df = self.transformer.transform(self.data_df)

        # 有状态的累加运算
        if pd_is_not_null(self.pipe_df) and self.accumulator:
            self.factor_df = self.accumulator.acc(self.pipe_df, self.factor_df)
        else:
            self.factor_df = self.pipe_df
Example #4
0
    def record(self, entity, start, end, size, timestamps):
        # 只要前复权数据
        if not self.end_timestamp:
            df = get_bars(to_jq_entity_id(entity),
                          count=size,
                          unit=self.jq_trading_level,
                          fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'],
                          fq_ref_date=to_time_str(now_pd_timestamp()),
                          include_now=True)
        else:
            end_timestamp = to_time_str(self.end_timestamp)
            df = get_bars(to_jq_entity_id(entity),
                          count=size,
                          unit=self.jq_trading_level,
                          fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'],
                          end_dt=end_timestamp,
                          fq_ref_date=to_time_str(now_pd_timestamp()),
                          include_now=False)

        if pd_is_not_null(df):
            df['name'] = entity.name
            df.rename(columns={'money': 'turnover', 'date': 'timestamp'}, inplace=True)

            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = 'joinquant'
            df['level'] = self.level.value
            df['code'] = entity.code

            # 判断是否需要重新计算之前保存的前复权数据
            check_df = df.head(1)
            check_date = check_df['timestamp'][0]
            current_df = get_kdata(entity_id=entity.id, provider=self.provider, start_timestamp=check_date,
                                   end_timestamp=check_date, limit=1, level=self.level)
            if pd_is_not_null(current_df):
                old = current_df.iloc[0, :]['close']
                new = check_df['close'][0]
                # 相同时间的close不同,表明前复权需要重新计算
                if round(old, 2) != round(new, 2):
                    self.factor = new / old
                    self.last_timestamp = pd.Timestamp(check_date)

            def generate_kdata_id(se):
                if self.level >= IntervalLevel.LEVEL_1DAY:
                    return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))
                else:
                    return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1)

            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

        return None
Example #5
0
    def register_data_listener(self, listener):
        if listener not in self.data_listeners:
            self.data_listeners.append(listener)

        # notify it once after registered
        if pd_is_not_null(self.data_df):
            listener.on_data_loaded(self.data_df)
Example #6
0
    def on_finish_entity(self, entity):
        super().on_finish_entity(entity)

        if not self.fetch_jq_timestamp:
            return

        # fill the timestamp for report published date
        the_data_list = get_data(
            data_schema=self.data_schema,
            provider=self.provider,
            entity_id=entity.id,
            order=self.data_schema.timestamp.asc(),
            return_type='domain',
            session=self.session,
            filters=[
                self.data_schema.timestamp == self.data_schema.report_date,
                self.data_schema.timestamp >= to_pd_timestamp('2005-01-01')
            ])
        if the_data_list:
            if self.data_schema == FinanceFactor:
                for the_data in the_data_list:
                    self.fill_timestamp_with_jq(entity, the_data)
            else:
                df = get_finance_factor(
                    entity_id=entity.id,
                    columns=[
                        FinanceFactor.timestamp, FinanceFactor.report_date,
                        FinanceFactor.id
                    ],
                    filters=[
                        FinanceFactor.timestamp != FinanceFactor.report_date,
                        FinanceFactor.timestamp >=
                        to_pd_timestamp('2005-01-01'),
                        FinanceFactor.report_date >=
                        the_data_list[0].report_date,
                        FinanceFactor.report_date <=
                        the_data_list[-1].report_date,
                    ])

                if pd_is_not_null(df):
                    index_df(df, index='report_date', time_field='report_date')

                for the_data in the_data_list:
                    if (df is not None) and (
                            not df.empty) and the_data.report_date in df.index:
                        the_data.timestamp = df.at[the_data.report_date,
                                                   'timestamp']
                        self.logger.info(
                            'db fill {} {} timestamp:{} for report_date:{}'.
                            format(self.data_schema, entity.id,
                                   the_data.timestamp, the_data.report_date))
                        self.session.commit()
                    else:
                        # self.logger.info(
                        #     'waiting jq fill {} {} timestamp:{} for report_date:{}'.format(self.data_schema,
                        #                                                                    security_item.id,
                        #                                                                    the_data.timestamp,
                        #                                                                    the_data.report_date))

                        self.fill_timestamp_with_jq(entity, the_data)
    def record(self, entity, start, end, size, timestamps):
        q = query(finance.FUND_PORTFOLIO_STOCK).filter(finance.FUND_PORTFOLIO_STOCK.pub_date >= start).filter(
            finance.FUND_PORTFOLIO_STOCK.code == entity.code)
        df = finance.run_query(q)
        if pd_is_not_null(df):
            #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
            # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
            # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
            # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
            # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90
            df['timestamp'] = pd.to_datetime(df['pub_date'])

            df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True)
            df['proportion'] = df['proportion'] * 0.01

            df = portfolio_relate_stock(df, entity)

            df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x))
            df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df['report_date'] = pd.to_datetime(df['period_end'])
            df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x))

            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

            # self.logger.info(df.tail())
            self.logger.info(f"persist etf {entity.code} portfolio success")

        return None
Example #8
0
    def draw(self,
             render='html',
             file_name=None,
             width=None,
             height=None,
             title=None,
             keep_ui_state=True,
             annotation_df=None,
             target_type: TargetType = TargetType.open_long):

        if target_type == TargetType.open_long:
            df = self.open_long_df.copy()
        elif target_type == TargetType.open_short:
            df = self.open_short_df.copy()

        df['target_type'] = target_type.value

        if pd_is_not_null(df):
            drawer = Drawer(NormalData(df=df))

            drawer.draw_table(render=render,
                              file_name=file_name,
                              width=width,
                              height=height,
                              title=title,
                              keep_ui_state=keep_ui_state)
    def on_finish(self):
        last_year = str(now_pd_timestamp().year)
        codes = [item.code for item in self.entities]
        need_filleds = get_dividend_financing(
            provider=self.provider,
            codes=codes,
            return_type='domain',
            session=self.session,
            filters=[DividendFinancing.rights_raising_fund.is_(None)],
            end_timestamp=last_year)

        for item in need_filleds:
            df = get_rights_issue_detail(
                provider=self.provider,
                entity_id=item.entity_id,
                columns=[
                    RightsIssueDetail.timestamp,
                    RightsIssueDetail.rights_raising_fund
                ],
                start_timestamp=item.timestamp,
                end_timestamp="{}-12-31".format(item.timestamp.year))
            if pd_is_not_null(df):
                item.rights_raising_fund = df['rights_raising_fund'].sum()
                self.session.commit()

        super().on_finish()
Example #10
0
def get_traders() -> List[str]:
    df = get_group(provider='zvt',
                   data_schema=SimAccount,
                   column=SimAccount.trader_name,
                   group_func=None)
    if pd_is_not_null(df):
        return df['trader_name'].tolist()
    return []
Example #11
0
def to_annotations(annotation_df: pd.DataFrame):
    """
    annotation_df format:
                                    value    flag    color
    entity_id    timestamp


    :param annotation_df:
    :type annotation_df:
    :return:
    :rtype:
    """
    annotations = []

    if pd_is_not_null(annotation_df):
        for trace_name, df in annotation_df.groupby(level=0):
            if pd_is_not_null(df):
                for (_, timestamp), item in df.iterrows():
                    if 'color' in item:
                        color = item['color']
                    else:
                        color = '#ec0000'

                    value = round(item['value'], 2)
                    annotations.append(
                        dict(
                            x=timestamp,
                            y=value,
                            xref='x',
                            yref='y',
                            text=item['flag'],
                            showarrow=True,
                            align='center',
                            arrowhead=2,
                            arrowsize=1,
                            arrowwidth=2,
                            # arrowcolor='#030813',
                            ax=-10,
                            ay=-30,
                            bordercolor='#c7c7c7',
                            borderwidth=1,
                            bgcolor=color,
                            opacity=0.8))

    return annotations
Example #12
0
    def acc(self, input_df, acc_df) -> pd.DataFrame:
        if pd_is_not_null(acc_df):
            input_df = input_df[~input_df['id'].isin(acc_df['id'])]

        input_df = input_df.copy()

        for entity_id, df in input_df.groupby(level=0):
            pre_index = None
            pre_item = None
            current_state = 0
            pre_state = 0

            for index, item in df.iterrows():
                if pre_item is not None:
                    current_state = get_current_state(item, pre_item,
                                                      current_state)

                input_df.loc[index, 'tmp_bi_state'] = current_state

                if (current_state != 0
                        and pre_state != 0) and current_state != pre_state:
                    # -1 -> 1
                    if current_state == 1:
                        input_df.loc[pre_index, 'tmp_di'] = True
                    # 1 -> -1
                    if current_state == -1:
                        input_df.loc[pre_index, 'tmp_ding'] = True

                pre_index = index
                pre_item = item
                pre_state = current_state

            print(input_df)
            self.logger.info('finish calculating :{}'.format(entity_id))

        if pd_is_not_null(acc_df):
            if pd_is_not_null(input_df):
                df = input_df[set(acc_df.columns) & set(input_df.columns)]
                acc_df = acc_df.append(df)
                acc_df = acc_df.sort_index(level=[0, 1])
        else:
            acc_df = input_df

        return acc_df
Example #13
0
    def run(self):
        """

        """
        if self.filter_factors:
            musts = []
            for factor in self.filter_factors:
                df = factor.get_result_df()

                if not pd_is_not_null(df):
                    raise Exception('no data for factor:{},{}'.format(
                        factor.factor_name, factor))

                if len(df.columns) > 1:
                    s = df.agg("and", axis="columns")
                    s.name = 'score'
                    musts.append(s.to_frame(name='score'))
                else:
                    df.columns = ['score']
                    musts.append(df)

            self.filter_result = list(accumulate(musts,
                                                 func=operator.__and__))[-1]

        if self.score_factors:
            scores = []
            for factor in self.score_factors:
                df = factor.get_result_df()
                if not pd_is_not_null(df):
                    raise Exception('no data for factor:{],{}'.format(
                        factor.factor_name, factor))

                if len(df.columns) > 1:
                    s = df.agg("mean", axis="columns")
                    s.name = 'score'
                    scores.append(s.to_frame(name='score'))
                else:
                    df.columns = ['score']
                    scores.append(df)
            self.score_result = list(accumulate(scores,
                                                func=operator.__add__))[-1]

        self.generate_targets()
Example #14
0
File: api.py Project: godsonhyl/zvt
def get_entity_ids(entity_type='stock',
                   exchanges=['sz', 'sh'],
                   codes=None,
                   provider='eastmoney'):
    df = get_entities(entity_type=entity_type,
                      exchanges=exchanges,
                      codes=codes,
                      provider=provider)
    if pd_is_not_null(df):
        return df['entity_id'].to_list()
    return None
Example #15
0
    def get_targets(self, timestamp, target_type: TargetType = TargetType.open_long) -> pd.DataFrame:
        if target_type == TargetType.open_long:
            df = self.open_long_df
        if target_type == TargetType.open_short:
            df = self.open_short_df

        if pd_is_not_null(df):
            if timestamp in df.index:
                target_df = df.loc[[to_pd_timestamp(timestamp)], :]
                return target_df['entity_id'].tolist()
        return []
Example #16
0
def get_entity_ids(entity_type='stock',
                   entity_schema: EntityMixin = None,
                   exchanges=None,
                   codes=None,
                   provider=None):
    df = get_entities(entity_type=entity_type,
                      entity_schema=entity_schema,
                      exchanges=exchanges,
                      codes=codes,
                      provider=provider)
    if pd_is_not_null(df):
        return df['entity_id'].to_list()
    return None
Example #17
0
    def load_window_df(self, provider, data_schema, window):
        window_df = None

        dfs = []
        for entity_id in self.entity_ids:
            df = data_schema.query_data(provider=provider,
                                        index=[self.category_field, self.time_field],
                                        order=data_schema.timestamp.desc(),
                                        entity_id=entity_id,
                                        limit=window)
            if pd_is_not_null(df):
                dfs.append(df)
        if dfs:
            window_df = pd.concat(dfs)
            window_df = window_df.sort_index(level=[0, 1])
        return window_df
Example #18
0
File: api.py Project: godsonhyl/zvt
def init_entities(df, entity_type='stock', provider='exchange'):
    df = df.drop_duplicates(subset=['id'])
    data_schema = get_entity_schema(entity_type)
    store_category = get_db_name(data_schema=data_schema)

    db_engine = get_db_engine(provider, db_name=store_category)
    security_schema = get_entity_schema(entity_type)

    current = get_entities(entity_type=entity_type,
                           columns=[security_schema.id, security_schema.code],
                           provider=provider)

    if pd_is_not_null(current):
        df = df[~df['id'].isin(current['id'])]

    df.to_sql(security_schema.__tablename__,
              db_engine,
              index=False,
              if_exists='append')
Example #19
0
    def normalize(self):
        """
        normalize data_df to
                                    col1    col2    col3
        entity_id    index_field

        """
        if pd_is_not_null(self.data_df):
            if not is_normal_df(self.data_df):
                self.data_df = normal_index_df(self.data_df)

            self.entity_ids = self.data_df.index.levels[0].to_list()

            for entity_id in self.entity_ids:
                df = self.data_df.loc[(entity_id, )]
                self.df_list.append(df)
                self.entity_map_df[entity_id] = df

            if len(self.df_list) > 1 and self.fill_index:
                self.df_list = fill_with_same_index(df_list=self.df_list)
Example #20
0
 def load_window_df(self, provider, data_schema):
     window_df = None
     if not self.entity_ids:
         self.entity_ids = get_entity_ids(provider='eastmoney', entity_type=self.entity_type,
                                          exchanges=self.exchanges,
                                          codes=self.codes)
     dfs = []
     for entity_id in self.entity_ids:
         df = get_data(provider=provider,
                       data_schema=data_schema,
                       start_timestamp=self.start_timestamp,
                       index=[self.category_field, self.time_field],
                       order=data_schema.timestamp.desc(),
                       entity_id=entity_id,
                       limit=self.computing_window)
         if pd_is_not_null(df):
             dfs.append(df)
     if dfs:
         window_df = pd.concat(dfs)
         window_df = window_df.sort_index(level=[0, 1])
     return window_df
Example #21
0
def risky_company(the_date=to_pd_timestamp(now_time_str()), income_yoy=-0.1, profit_yoy=-0.1, entity_ids=None):
    codes = []
    start_timestamp = to_pd_timestamp(the_date) - datetime.timedelta(130)
    # 营收降,利润降,流动比率低,速动比率低
    finance_filter = or_(FinanceFactor.op_income_growth_yoy < income_yoy,
                         FinanceFactor.net_profit_growth_yoy <= profit_yoy,
                         FinanceFactor.current_ratio < 0.7,
                         FinanceFactor.quick_ratio < 0.5)
    df = FinanceFactor.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp, filters=[finance_filter],
                                  columns=['code'])
    if pd_is_not_null(df):
        codes = codes + df.code.tolist()

    # 高应收,高存货,高商誉
    balance_filter = (BalanceSheet.accounts_receivable + BalanceSheet.inventories + BalanceSheet.goodwill) \
                     > BalanceSheet.total_equity / 2
    df = BalanceSheet.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp, filters=[balance_filter],
                                 columns=['code'])
    if pd_is_not_null(df):
        codes = codes + df.code.tolist()

    # 应收>利润*1/2
    df1 = BalanceSheet.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp,
                                  columns=[BalanceSheet.code, BalanceSheet.accounts_receivable])
    if pd_is_not_null(df1):
        df1.drop_duplicates(subset='code', keep='last', inplace=True)
        df1 = df1.set_index('code', drop=True).sort_index()

    df2 = IncomeStatement.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp,
                                     columns=[IncomeStatement.code,
                                              IncomeStatement.net_profit])
    if pd_is_not_null(df2):
        df2.drop_duplicates(subset='code', keep='last', inplace=True)
        df2 = df2.set_index('code', drop=True).sort_index()

    if pd_is_not_null(df1) and pd_is_not_null(df2):
        codes = codes + df1[df1.accounts_receivable > df2.net_profit / 2].index.tolist()

    return list(set(codes))
Example #22
0
    def __init__(
            self,
            data_schema: object,
            entity_ids: List[str] = None,
            entity_type: str = 'stock',
            exchanges: List[str] = ['sh', 'sz'],
            codes: List[str] = None,
            the_timestamp: Union[str, pd.Timestamp] = None,
            start_timestamp: Union[str, pd.Timestamp] = None,
            end_timestamp: Union[str, pd.Timestamp] = None,
            columns: List = None,
            filters: List = None,
            order: object = None,
            limit: int = None,
            provider: str = 'eastmoney',
            level: Union[str, IntervalLevel] = IntervalLevel.LEVEL_1DAY,
            category_field: str = 'entity_id',
            time_field: str = 'timestamp',
            computing_window: int = 250,
            # child added arguments
            keep_all_timestamp: bool = False,
            fill_method: str = 'ffill',
            effective_number: int = 10,
            transformer: Transformer = None,
            accumulator: Accumulator = None,
            need_persist: bool = True,
            dry_run: bool = False) -> None:

        super().__init__(data_schema, entity_ids, entity_type, exchanges,
                         codes, the_timestamp, start_timestamp, end_timestamp,
                         columns, filters, order, limit, provider, level,
                         category_field, time_field, computing_window)

        self.factor_name = type(self).__name__.lower()

        self.keep_all_timestamp = keep_all_timestamp
        self.fill_method = fill_method
        self.effective_number = effective_number
        self.transformer = transformer
        self.accumulator = accumulator

        self.need_persist = need_persist
        self.dry_run = dry_run

        # 计算因子的结果,可持久化
        self.factor_df: pd.DataFrame = None
        # 中间结果,不持久化
        self.pipe_df: pd.DataFrame = None
        # result_df是用于选股的标准df
        self.result_df: pd.DataFrame = None

        # 如果是accumulate类的运算,需要利用之前的factor_df,比如全市场的一些统计信息
        if self.need_persist:
            # 如果只是为了计算因子,只需要读取valid_window的factor_df
            if self.dry_run:
                self.factor_df = self.load_window_df(
                    provider='zvt', data_schema=self.factor_schema)
            else:
                self.factor_df = get_data(
                    provider='zvt',
                    data_schema=self.factor_schema,
                    start_timestamp=self.start_timestamp,
                    index=[self.category_field, self.time_field])

        if pd_is_not_null(self.factor_df):
            dfs = []
            for entity_id, df in self.data_df.groupby(level=0):
                if entity_id in self.factor_df.index.levels[0]:
                    df = df[df.timestamp >= self.factor_df.loc[(
                        entity_id, )].index[0]]
                dfs.append(df)

            self.data_df = pd.concat(dfs)

        self.register_data_listener(self)
Example #23
0
    def do_compute(self):
        super().do_compute()

        if pd_is_not_null(self.pipe_df) and self.scorer:
            self.result_df = self.scorer.score(self.data_df)
Example #24
0
 def pre_compute(self):
     if not pd_is_not_null(self.pipe_df):
         self.pipe_df = self.data_df
Example #25
0
    def acc(self, input_df, acc_df) -> pd.DataFrame:
        short_ma_col = 'ma{}'.format(self.short_window)
        long_ma_col = 'ma{}'.format(self.long_window)

        input_df['score'] = input_df[short_ma_col] > input_df[long_ma_col]

        # 过滤掉已经计算的时间
        if pd_is_not_null(acc_df):
            dfs = []
            for entity_id, df in input_df.groupby(level=0):
                if entity_id in acc_df.index.levels[0]:
                    df = df[df.timestamp > acc_df.loc[(entity_id, )].index[-1]]
                dfs.append(df)

            input_df = pd.concat(dfs, sort=False)

        for entity_id, df in input_df.groupby(level=0):
            count = 0
            pct = 1
            current_state = None
            pre_index = None
            check_acc = False
            for index, item in df['score'].iteritems():
                # 5日线在10日线之上
                if item:
                    state = 'up'
                # 5日线在10日线之下
                elif not pd.isna(df[short_ma_col][index]) and not pd.isna(
                        df[long_ma_col][index]):
                    state = 'down'
                else:
                    continue

                # 计算维持状态('up','down')的 次数
                if current_state == state:
                    if count > 0:
                        count = count + 1
                    else:
                        count = count - 1
                    if pct == 0:
                        pct = df['change_pct'][index]
                    else:
                        pct = (1 + pct) * (1 + df['change_pct'][index]) - 1

                else:
                    # 状态切换,设置前一状态的总和
                    if count != 0:
                        input_df.loc[pre_index, self.total_col] = count
                    current_state = state

                    if current_state == 'up':
                        count = 1
                    else:
                        count = -1
                    pct = 0

                    # 增量计算,需要累加之前的结果
                    if pd_is_not_null(acc_df) and not check_acc:
                        if entity_id in acc_df.index.levels[0]:
                            acc_col_current = acc_df.loc[(
                                entity_id, )].iloc[-1][self.current_col]
                            if not pd.isna(acc_col_current):
                                # up
                                if acc_col_current > 0 and (current_state
                                                            == 'up'):
                                    count = acc_col_current + 1
                                # down
                                elif acc_col_current < 0 and (current_state
                                                              == 'down'):
                                    count = acc_col_current - 1
                                # state has changed
                                else:
                                    pre_timestamp = acc_df.loc[(entity_id, ),
                                                               'timestamp'][-1]
                                    acc_df.loc[
                                        (entity_id, pre_timestamp),
                                        self.total_col] = acc_col_current
                        check_acc = True

                # 设置目前状态
                input_df.loc[index, self.current_col] = count
                input_df.loc[index, 'current_pct'] = pct

                pre_index = index

            self.logger.info('finish calculating :{}'.format(entity_id))

        if pd_is_not_null(acc_df):
            if pd_is_not_null(input_df):
                df = input_df[set(acc_df.columns) & set(input_df.columns)]
                acc_df = acc_df.append(df, sort=False)
                acc_df = acc_df.sort_index(level=[0, 1])
        else:
            acc_df = input_df

        return acc_df
Example #26
0
 def empty(self):
     return not pd_is_not_null(self.data_df)
Example #27
0
    def record(self, entity, start, end, size, timestamps):
        if not end:
            end = now_pd_timestamp()

        date_range = pd.date_range(start=start, end=end, freq='1D').tolist()
        for date in date_range:
            # etf包含的个股和比例
            etf_stock_df = get_etf_stocks(code=entity.code,
                                          timestamp=date,
                                          provider=self.provider)

            if pd_is_not_null(etf_stock_df):
                all_pct = etf_stock_df['proportion'].sum()

                if all_pct >= 1.2 or all_pct <= 0.8:
                    self.logger.error(
                        f'ignore etf:{entity.id}  date:{date} proportion sum:{all_pct}'
                    )
                    break

                etf_stock_df.set_index('stock_id', inplace=True)

                # 个股的估值数据
                stock_valuation_df = StockValuation.query_data(
                    entity_ids=etf_stock_df.index.to_list(),
                    filters=[StockValuation.timestamp == date],
                    index='entity_id')

                if pd_is_not_null(stock_valuation_df):
                    stock_count = len(etf_stock_df)
                    valuation_count = len(stock_valuation_df)

                    self.logger.info(
                        f'etf:{entity.id} date:{date} stock count: {stock_count},'
                        f'valuation count:{valuation_count}')

                    pct = abs(stock_count - valuation_count) / stock_count

                    if pct >= 0.2:
                        self.logger.error(
                            f'ignore etf:{entity.id}  date:{date} pct:{pct}')
                        break

                    se = pd.Series({
                        'id': "{}_{}".format(entity.id, date),
                        'entity_id': entity.id,
                        'timestamp': date,
                        'code': entity.code,
                        'name': entity.name
                    })
                    for col in ['pe', 'pe_ttm', 'pb', 'ps', 'pcf']:
                        # PE=P/E
                        # 这里的算法为:将其价格都设为PE,那么Earning为1(亏钱为-1),结果为 总价格(PE)/总Earning

                        value = 0
                        price = 0

                        # 权重估值
                        positive_df = stock_valuation_df[[
                            col
                        ]][stock_valuation_df[col] > 0]
                        positive_df['count'] = 1
                        positive_df = positive_df.multiply(
                            etf_stock_df["proportion"], axis="index")
                        if pd_is_not_null(positive_df):
                            value = positive_df['count'].sum()
                            price = positive_df[col].sum()

                        negative_df = stock_valuation_df[[
                            col
                        ]][stock_valuation_df[col] < 0]
                        if pd_is_not_null(negative_df):
                            negative_df['count'] = 1
                            negative_df = negative_df.multiply(
                                etf_stock_df["proportion"], axis="index")
                            value = value - negative_df['count'].sum()
                            price = price + negative_df[col].sum()

                        se[f'{col}1'] = price / value

                        # 简单算术平均估值
                        positive_df = stock_valuation_df[col][
                            stock_valuation_df[col] > 0]
                        positive_count = len(positive_df)

                        negative_df = stock_valuation_df[col][
                            stock_valuation_df[col] < 0]
                        negative_count = len(negative_df)

                        value = positive_count - negative_count
                        price = positive_df.sum() + abs(negative_df.sum())

                        se[col] = price / value
                    df = se.to_frame().T

                    self.logger.info(df)

                    df_to_db(df=df,
                             data_schema=self.data_schema,
                             provider=self.provider,
                             force_update=self.force_update)

        return None
Example #28
0
    def __init__(self,
                 data_schema: Mixin,
                 entity_schema: EntityMixin,
                 provider: str = None,
                 entity_provider: str = None,
                 entity_ids: List[str] = None,
                 exchanges: List[str] = None,
                 codes: List[str] = None,
                 the_timestamp: Union[str, pd.Timestamp] = None,
                 start_timestamp: Union[str, pd.Timestamp] = None,
                 end_timestamp: Union[str, pd.Timestamp] = now_pd_timestamp(),
                 columns: List = None,
                 filters: List = None,
                 order: object = None,
                 limit: int = None,
                 level: IntervalLevel = IntervalLevel.LEVEL_1DAY,
                 category_field: str = 'entity_id',
                 time_field: str = 'timestamp',
                 computing_window: int = None) -> None:
        self.logger = logging.getLogger(self.__class__.__name__)

        self.data_schema = data_schema
        self.entity_schema = entity_schema

        self.provider = provider
        self.entity_provider = entity_provider

        self.the_timestamp = the_timestamp
        if the_timestamp:
            self.start_timestamp = the_timestamp
            self.end_timestamp = the_timestamp
        else:
            self.start_timestamp = start_timestamp
            self.end_timestamp = end_timestamp

        self.start_timestamp = to_pd_timestamp(self.start_timestamp)
        self.end_timestamp = to_pd_timestamp(self.end_timestamp)

        self.exchanges = exchanges

        if codes:
            if type(codes) == str:
                codes = codes.replace(' ', '')
                if codes.startswith('[') and codes.endswith(']'):
                    codes = json.loads(codes)
                else:
                    codes = codes.split(',')

        self.codes = codes
        self.entity_ids = entity_ids

        # 转换成标准entity_id
        if entity_schema and not self.entity_ids:
            df = get_entities(entity_schema=entity_schema, provider=self.entity_provider,
                              exchanges=self.exchanges, codes=self.codes)
            if pd_is_not_null(df):
                self.entity_ids = df['entity_id'].to_list()

        self.filters = filters
        self.order = order
        self.limit = limit

        if level:
            self.level = IntervalLevel(level)
        else:
            self.level = level

        self.category_field = category_field
        self.time_field = time_field
        self.computing_window = computing_window

        self.category_col = eval('self.data_schema.{}'.format(self.category_field))
        self.time_col = eval('self.data_schema.{}'.format(self.time_field))

        self.columns = columns

        # we store the data in a multiple index(category_column,timestamp) Dataframe
        if self.columns:
            # support str
            if type(columns[0]) == str:
                self.columns = []
                for col in columns:
                    self.columns.append(eval('data_schema.{}'.format(col)))

            # always add category_column and time_field for normalizing
            self.columns = list(set(self.columns) | {self.category_col, self.time_col})

        self.data_listeners: List[DataListener] = []

        self.data_df: pd.DataFrame = None

        self.load_data()
Example #29
0
    def move_on(self,
                to_timestamp: Union[str, pd.Timestamp] = None,
                timeout: int = 20) -> object:
        """
        using continual fetching data in realtime
        1)get the data happened before to_timestamp,if not set,get all the data which means to now
        2)if computing_window set,the data_df would be cut for saving memory


        :param to_timestamp:
        :type to_timestamp:
        :param timeout:
        :type timeout: int
        :return:
        :rtype:
        """
        if not pd_is_not_null(self.data_df):
            self.load_data()
            return

        start_time = time.time()

        # FIXME:we suppose history data should be there at first
        has_got = []
        dfs = []
        changed = False
        while True:
            for entity_id, df in self.data_df.groupby(level=0):
                if entity_id in has_got:
                    continue

                recorded_timestamp = df['timestamp'].max()

                # move_on读取数据,表明之前的数据已经处理完毕,只需要保留computing_window的数据
                if self.computing_window:
                    df = df.iloc[-self.computing_window:]

                added_filter = [
                    self.category_col == entity_id,
                    self.time_col > recorded_timestamp
                ]
                if self.filters:
                    filters = self.filters + added_filter
                else:
                    filters = added_filter

                added_df = self.data_schema.query_data(
                    provider=self.provider,
                    columns=self.columns,
                    end_timestamp=to_timestamp,
                    filters=filters,
                    level=self.level,
                    index=[self.category_field, self.time_field])

                if pd_is_not_null(added_df):
                    self.logger.info('entity_id:{},added:\n{}'.format(
                        entity_id, added_df))

                    for listener in self.data_listeners:
                        listener.on_entity_data_changed(entity=entity_id,
                                                        added_data=added_df)
                    # if got data,just move to another entity_id
                    changed = True
                    has_got.append(entity_id)
                    df = df.append(added_df, sort=False)
                    dfs.append(df)
                else:
                    cost_time = time.time() - start_time
                    if cost_time > timeout:
                        # if timeout,just add the old data
                        has_got.append(entity_id)
                        dfs.append(df)
                        self.logger.warning(
                            'category:{} level:{} getting data timeout,to_timestamp:{},now:{}'
                            .format(entity_id, self.level, to_timestamp,
                                    now_pd_timestamp()))
                        continue

            if len(has_got) == len(self.data_df.index.levels[0]):
                break

        if dfs:
            self.data_df = pd.concat(dfs, sort=False)
            self.data_df.sort_index(level=[0, 1])

            if changed:
                for listener in self.data_listeners:
                    listener.on_data_changed(self.data_df)
Example #30
0
    def record(self, entity, start, end, size, timestamps):
        if not end:
            end = now_pd_timestamp()

        date_range = pd.date_range(start=start, end=end, freq='1D').tolist()
        for date in date_range:
            # etf包含的个股和比例
            etf_stock_df = get_etf_stocks(code=entity.code,
                                          timestamp=date,
                                          provider=self.provider)

            all_pct = etf_stock_df['proportion'].sum()

            if all_pct >= 1.1 or all_pct <= 0.9:
                self.logger.info(
                    f'etf:{entity.id}  date:{date} proportion sum:{all_pct}')

            if pd_is_not_null(etf_stock_df):
                etf_stock_df.set_index('stock_id', inplace=True)

                # 个股的估值数据
                stock_valuation_df = StockValuation.query_data(
                    entity_ids=etf_stock_df.index.to_list(),
                    filters=[StockValuation.timestamp == date],
                    index='entity_id')

                if pd_is_not_null(stock_valuation_df):
                    # 暂时只支持 简单算术平均估值,理由:模糊的正确比精确的错误有用
                    # A股个股的市值往往相差很大,按市值权重的话,这样的估值很难反映整体
                    self.logger.info(
                        f'etf:{entity.id} date:{date} stock count: {len(etf_stock_df)},valuation count:{len(stock_valuation_df)}'
                    )

                    #     # 静态pe
                    #     pe = Column(Float)
                    #     # 动态pe
                    #     pe_ttm = Column(Float)
                    #     # 市净率
                    #     pb = Column(Float)
                    #     # 市销率
                    #     ps = Column(Float)
                    #     # 市现率
                    #     pcf = Column(Float)

                    se = pd.Series({
                        'id': "{}_{}".format(entity.id, date),
                        'entity_id': entity.id,
                        'timestamp': date,
                        'code': entity.code,
                        'name': entity.name
                    })
                    for col in ['pe', 'pe_ttm', 'pb', 'ps', 'pcf']:
                        # PE=P/E
                        # 这里的算法为:将其价格都设为1,算出总earning,再相除
                        positive_df = stock_valuation_df[col][
                            stock_valuation_df[col] > 0]
                        positive_count = len(positive_df)

                        negative_df = stock_valuation_df[col][
                            stock_valuation_df[col] < 0]
                        negative_count = len(negative_df)

                        result = (positive_count + negative_count) / (
                            positive_count / positive_df.mean() +
                            negative_count / negative_df.mean())

                        se[col] = result
                    df = se.to_frame().T

                    self.logger.info(df)

                    df_to_db(df=df,
                             data_schema=self.data_schema,
                             provider=self.provider,
                             force_update=self.force_update)

        return None