Beispiel #1
0
def update_factor_details(factor, entity_type, entity, levels, columns,
                          trader_index, schema_name):
    if factor and entity_type and entity and levels:
        sub_df = None
        # add sub graph
        if columns:
            if type(columns) == str:
                columns = [columns]
            columns = columns + ['entity_id', 'timestamp']
            schema: Mixin = get_schema_by_name(name=schema_name)
            sub_df = schema.query_data(entity_id=entity, columns=columns)

        # add trading signals as annotation
        annotation_df = None
        if trader_index is not None:
            order_reader = order_readers[trader_index]
            annotation_df = order_reader.data_df.copy()
            annotation_df = annotation_df[annotation_df.entity_id ==
                                          entity].copy()
            if pd_is_not_null(annotation_df):
                annotation_df['value'] = annotation_df['order_price']
                annotation_df['flag'] = annotation_df['order_type'].apply(
                    lambda x: order_type_flag(x))
                annotation_df['color'] = annotation_df['order_type'].apply(
                    lambda x: order_type_color(x))
            print(annotation_df.tail())

        if type(levels) is list and len(levels) >= 2:
            levels.sort()
            drawers = []
            for level in levels:
                drawers.append(zvt_context.factor_cls_registry[factor](
                    entity_schema=zvt_context.entity_schema_map[entity_type],
                    level=level,
                    entity_ids=[entity]).drawer())
            stacked = StackedDrawer(*drawers)

            return dcc.Graph(id=f'{factor}-{entity_type}-{entity}',
                             figure=stacked.draw_kline(show=False, height=900))
        else:
            if type(levels) is list:
                level = levels[0]
            else:
                level = levels
            drawer = zvt_context.factor_cls_registry[factor](
                entity_schema=zvt_context.entity_schema_map[entity_type],
                level=level,
                entity_ids=[entity],
                need_persist=False).drawer()
            if pd_is_not_null(sub_df):
                drawer.add_sub_df(sub_df)
            if pd_is_not_null(annotation_df):
                drawer.annotation_df = annotation_df

            return dcc.Graph(id=f'{factor}-{entity_type}-{entity}',
                             figure=drawer.draw_kline(show=False, height=800))
    raise dash.PreventUpdate()
Beispiel #2
0
    def on_time(self, timestamp: pd.Timestamp):
        recent_report_date = to_pd_timestamp(get_recent_report_date(timestamp))
        if self.finish_date and is_same_date(recent_report_date,
                                             self.finish_date):
            return
        filters = [
            StockActorSummary.actor_type == ActorType.raised_fund.value,
            StockActorSummary.report_date == recent_report_date
        ]

        if self.entity_ids:
            filters = filters + [
                StockActorSummary.entity_id.in_(self.entity_ids)
            ]

        df = StockActorSummary.query_data(filters=filters)

        if pd_is_not_null(df):
            self.logger.info(f'{df}')
            self.finish_date = recent_report_date

        long_df = df[df['change_ratio'] > 0.05]
        short_df = df[df['change_ratio'] < -0.5]
        try:
            self.trade_the_targets(
                due_timestamp=timestamp,
                happen_timestamp=timestamp,
                long_selected=set(long_df['entity_id'].to_list()),
                short_selected=set(short_df['entity_id'].to_list()))
        except Exception as e:
            self.logger.error(e)
Beispiel #3
0
    def init_entities(self):
        """
        init the entities which we would record data for

        """
        if self.entity_provider == self.provider and self.entity_schema == self.data_schema:
            self.entity_session = self.session
        else:
            self.entity_session = get_db_session(provider=self.entity_provider, data_schema=self.entity_schema)

        filters = None
        if self.day_data:
            df = self.data_schema.query_data(start_timestamp=now_time_str(), columns=['entity_id', 'timestamp'],
                                             provider=self.provider)
            if pd_is_not_null(df):
                entity_ids = df['entity_id'].tolist()
                self.logger.info(f'ignore entity_ids:{entity_ids}')
                filters = [self.entity_schema.entity_id.notin_(entity_ids)]

        # init the entity list
        self.entities = get_entities(session=self.entity_session,
                                     entity_schema=self.entity_schema,
                                     entity_type=self.entity_type,
                                     exchanges=self.exchanges,
                                     entity_ids=self.entity_ids,
                                     codes=self.codes,
                                     return_type='domain',
                                     provider=self.entity_provider,
                                     filters=filters)
Beispiel #4
0
    def tag(self, timestamp):
        df = get_recent_report(
            data_schema=StockActorSummary,
            timestamp=timestamp,
            filters=[
                StockActorSummary.actor_type == ActorType.raised_fund.value
            ],
        )
        if not pd_is_not_null(df):
            logger.error(f"no StockActorSummary data at {timestamp}")
            return

        df = df.set_index("entity_id")

        fund_love_ids = df[(df["holding_ratio"] >= 0.05)
                           & (df["change_ratio"] >= -0.3)].index.tolist()
        fund_not_care_ids = df[(df["holding_ratio"] < 0.05) |
                               (df["change_ratio"] < -0.3)].index.tolist()

        fund_love_domains = self.get_tag_domains(
            entity_ids=fund_love_ids,
            timestamp=timestamp,
            actor_tag=ActorTag.fund_love.value)
        fund_not_care_domains = self.get_tag_domains(
            entity_ids=fund_not_care_ids,
            timestamp=timestamp,
            actor_tag=ActorTag.fund_not_care.value)
        self.session.add_all(fund_love_domains)
        self.session.add_all(fund_not_care_domains)
        self.session.commit()
Beispiel #5
0
def get_performance_stats(
    entity_type="stock",
    start_timestamp=None,
    end_timestamp=None,
    adjust_type: Union[AdjustType, str] = None,
    data_provider=None,
    changes=((-1, -0.5), (-0.5, -0.2), (-0.2, 0), (0, 0.2), (0.2, 0.5),
             (0.5, 1), (1, 1000)),
):
    if not adjust_type:
        adjust_type = default_adjust_type(entity_type=entity_type)
    data_schema = get_kdata_schema(entity_type=entity_type,
                                   adjust_type=adjust_type)

    score_df, _ = get_top_entities(
        data_schema=data_schema,
        column="close",
        start_timestamp=start_timestamp,
        end_timestamp=end_timestamp,
        pct=1,
        method=WindowMethod.change,
        return_type=TopType.positive,
        data_provider=data_provider,
    )

    if pd_is_not_null(score_df):
        result = {}
        for change in changes:
            range_start = change[0]
            range_end = change[1]
            key = f"pct_{range_start}_{range_end}"
            df = score_df[(score_df["score"] >= range_start)
                          & (score_df["score"] < range_end)]
            result[key] = len(df)
        return result
Beispiel #6
0
    def record(self, entity, start, end, size, timestamps):
        # 上证
        if entity.code == '000001':
            all_df = StockMoneyFlow.query_data(
                provider=self.provider,
                start_timestamp=start,
                filters=[StockMoneyFlow.entity_id.like('stock_sh%')])
        # 深证
        elif entity.code == '399001':
            all_df = StockMoneyFlow.query_data(
                provider=self.provider,
                start_timestamp=start,
                filters=[StockMoneyFlow.entity_id.like('stock_sz%')])
        # 创业板
        elif entity.code == '399006':
            all_df = StockMoneyFlow.query_data(
                provider=self.provider,
                start_timestamp=start,
                filters=[StockMoneyFlow.code.like('300%')])

        if pd_is_not_null(all_df):
            g = all_df.groupby('timestamp')
            for timestamp, df in g:
                se = pd.Series({
                    'id':
                    "{}_{}".format(entity.id, to_time_str(timestamp)),
                    'entity_id':
                    entity.id,
                    'timestamp':
                    timestamp,
                    'code':
                    entity.code,
                    'name':
                    entity.name
                })
                for col in [
                        'net_main_inflows', 'net_huge_inflows',
                        'net_big_inflows', 'net_medium_inflows',
                        'net_small_inflows'
                ]:
                    se[col] = df[col].sum()

                for col in [
                        'net_main_inflow_rate', 'net_huge_inflow_rate',
                        'net_big_inflow_rate', 'net_medium_inflow_rate',
                        'net_small_inflow_rate'
                ]:
                    se[col] = df[col].sum() / len(df)

                index_df = se.to_frame().T

                self.logger.info(index_df)

                df_to_db(df=index_df,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=self.force_update)

        return None
Beispiel #7
0
    def acc_one(self, entity_id, df: pd.DataFrame, acc_df: pd.DataFrame,
                state: dict) -> (pd.DataFrame, dict):
        self.logger.info(f'acc_one:{entity_id}')
        if pd_is_not_null(acc_df):
            df = df[df.index > acc_df.index[-1]]
            if pd_is_not_null(df):
                self.logger.info(f'compute from {df.iloc[0]["timestamp"]}')
                acc_df = pd.concat([acc_df, df])
            else:
                self.logger.info('no need to compute')
                return acc_df, state
        else:
            acc_df = df

        for window in self.windows:
            col = 'ma{}'.format(window)
            self.indicators.append(col)

            ma_df = acc_df['close'].rolling(window=window,
                                            min_periods=window).mean()
            acc_df[col] = ma_df

        acc_df['live'] = (acc_df['ma5'] >
                          acc_df['ma10']).apply(lambda x: live_or_dead(x))
        acc_df['distance'] = (acc_df['ma5'] - acc_df['ma10']) / acc_df['close']

        live = acc_df['live']
        acc_df['count'] = live * (live.groupby(
            (live != live.shift()).cumsum()).cumcount() + 1)

        acc_df['bulk'] = (live != live.shift()).cumsum()
        area_df = acc_df[['distance', 'bulk']]
        acc_df['area'] = area_df.groupby('bulk').cumsum()

        for vol_window in self.vol_windows:
            col = 'vol_ma{}'.format(vol_window)
            self.indicators.append(col)

            vol_ma_df = acc_df['turnover'].rolling(
                window=vol_window, min_periods=vol_window).mean()
            acc_df[col] = vol_ma_df

        acc_df = acc_df.set_index('timestamp', drop=False)
        return acc_df, state
Beispiel #8
0
 def record(self, entity, start, end, size, timestamps):
     df = get_kdata(entity_id=entity.id,
                    limit=size,
                    adjust_type=self.adjust_type)
     if pd_is_not_null(df):
         df_to_db(df=df,
                  data_schema=self.data_schema,
                  provider=self.provider,
                  force_update=self.force_update)
     else:
         self.logger.info(f'no kdata for {entity.id}')
Beispiel #9
0
def get_recent_report(data_schema: Type[Mixin], timestamp, entity_id=None, filters=None, max_step=2):
    i = 0
    while i < max_step:
        report_date = get_recent_report_date(the_date=timestamp, step=i)
        if filters:
            filters = filters + [data_schema.report_date == to_pd_timestamp(report_date)]
        else:
            filters = [data_schema.report_date == to_pd_timestamp(report_date)]
        df = data_schema.query_data(entity_id=entity_id, filters=filters)
        if pd_is_not_null(df):
            return df
        i = i + 1
Beispiel #10
0
    def drawer_annotation_df(self) -> Optional[pd.DataFrame]:
        def order_type_flag(df):
            return "<br>".join(df.tolist())

        if pd_is_not_null(self.player_df):
            annotation_df = self.player_df.copy()
            annotation_df["value"] = self.factor_df.loc[
                annotation_df.index]["close"]
            annotation_df["flag"] = annotation_df[[
                "dep1", "dep2", "dep3", "dep4", "dep5"
            ]].apply(lambda x: order_type_flag(x), axis=1)
            annotation_df["color"] = "#ff7f0e"
            return annotation_df
Beispiel #11
0
    def acc_one(self, entity_id, df: pd.DataFrame, acc_df: pd.DataFrame, state: dict) -> (pd.DataFrame, dict):
        self.logger.info(f"acc_one:{entity_id}")
        if pd_is_not_null(acc_df):
            df = df[df.index > acc_df.index[-1]]
            if pd_is_not_null(df):
                self.logger.info(f'compute from {df.iloc[0]["timestamp"]}')
                acc_df = pd.concat([acc_df, df])
            else:
                self.logger.info("no need to compute")
                return acc_df, state
        else:
            acc_df = df

        for window in self.windows:
            col = "ma{}".format(window)
            self.indicators.append(col)

            ma_df = acc_df["close"].rolling(window=window, min_periods=window).mean()
            acc_df[col] = ma_df

        acc_df["live"] = (acc_df["ma5"] > acc_df["ma10"]).apply(lambda x: live_or_dead(x))
        acc_df["distance"] = (acc_df["ma5"] - acc_df["ma10"]) / acc_df["close"]

        live = acc_df["live"]
        acc_df["count"] = live * (live.groupby((live != live.shift()).cumsum()).cumcount() + 1)

        acc_df["bulk"] = (live != live.shift()).cumsum()
        area_df = acc_df[["distance", "bulk"]]
        acc_df["area"] = area_df.groupby("bulk").cumsum()

        for vol_window in self.vol_windows:
            col = "vol_ma{}".format(vol_window)
            self.indicators.append(col)

            vol_ma_df = acc_df["turnover"].rolling(window=vol_window, min_periods=vol_window).mean()
            acc_df[col] = vol_ma_df

        acc_df = acc_df.set_index("timestamp", drop=False)
        return acc_df, state
Beispiel #12
0
def show_month_performance():
    dfs = []
    for timestamp, df in got_top_performance_by_month(start_timestamp='2005-01-01', list_days=250):
        if pd_is_not_null(df):
            df = df.reset_index(drop=True)
            df['entity_id'] = 'stock_cn_performance'
            df['timestamp'] = timestamp
            dfs.append(df)

    all_df = pd.concat(dfs)
    print(all_df)

    drawer = Drawer(main_df=all_df)
    drawer.draw_scatter(show=True)
Beispiel #13
0
    def tag(self, timestamp):
        for index_id in index_map_market_value:
            df = IndexStock.query_data(entity_id=index_id, start_timestamp=month_start_date(timestamp),
                                       end_timestamp=month_end_date(timestamp))
            if not pd_is_not_null(df):
                logger.error(f'no IndexStock data at {timestamp} for {index_id}')
                continue
            stock_tags = [self.get_tag_domain(entity_id=stock_id, timestamp=timestamp) for stock_id in
                          df['stock_id'].tolist()]

            for stock_tag in stock_tags:
                stock_tag.market_value_tag = index_map_market_value.get(index_id).value

            self.session.add_all(stock_tags)
            self.session.commit()
Beispiel #14
0
def get_entity_list_by_cap(timestamp,
                           cap_start,
                           cap_end,
                           entity_type="stock",
                           provider=None,
                           adjust_type=None,
                           retry_times=20):
    if not adjust_type:
        adjust_type = default_adjust_type(entity_type=entity_type)

    kdata_schema = get_kdata_schema(entity_type,
                                    level=IntervalLevel.LEVEL_1DAY,
                                    adjust_type=adjust_type)
    df = kdata_schema.query_data(
        provider=provider,
        filters=[kdata_schema.timestamp == to_pd_timestamp(timestamp)],
        index="entity_id",
    )
    if pd_is_not_null(df):
        df["cap"] = df["turnover"] / df["turnover_rate"]
        df_result = df.copy()
        if cap_start:
            df_result = df_result.loc[(df["cap"] >= cap_start)]
        if cap_end:
            df_result = df_result.loc[(df["cap"] <= cap_end)]
        return df_result.index.tolist()
    else:
        if retry_times == 0:
            return []
        return get_entity_list_by_cap(
            timestamp=next_date(timestamp, 1),
            cap_start=cap_start,
            cap_end=cap_end,
            entity_type=entity_type,
            provider=provider,
            adjust_type=adjust_type,
            retry_times=retry_times - 1,
        )
Beispiel #15
0
    def init_entities(self):
        """
        init the entities which we would record data for

        """
        if self.entity_provider == self.provider and self.entity_schema == self.data_schema:
            self.entity_session = self.session
        else:
            self.entity_session = get_db_session(
                provider=self.entity_provider, data_schema=self.entity_schema)

        if self.day_data:
            df = self.data_schema.query_data(
                start_timestamp=now_time_str(),
                columns=["entity_id", "timestamp"],
                provider=self.provider)
            if pd_is_not_null(df):
                entity_ids = df["entity_id"].tolist()
                self.logger.info(f"ignore entity_ids:{entity_ids}")
                if self.entity_filters:
                    self.entity_filters.append(
                        self.entity_schema.entity_id.notin_(entity_ids))
                else:
                    self.entity_filters = [
                        self.entity_schema.entity_id.notin_(entity_ids)
                    ]

        #: init the entity list
        self.entities = get_entities(
            session=self.entity_session,
            entity_schema=self.entity_schema,
            exchanges=self.exchanges,
            entity_ids=self.entity_ids,
            codes=self.codes,
            return_type="domain",
            provider=self.entity_provider,
            filters=self.entity_filters,
        )
Beispiel #16
0
def update_factor_details(factor, entity_type, code, levels, columns,
                          schema_name):
    if factor and entity_type and code and levels:
        sub_df = None
        if columns:
            if type(columns) == str:
                columns = [columns]
            columns = columns + ['entity_id', 'timestamp']
            schema: Mixin = get_schema_by_name(name=schema_name)
            sub_df = schema.query_data(code=code, columns=columns)
        if type(levels) is list and len(levels) >= 2:
            levels.sort()
            drawers = []
            for level in levels:
                drawers.append(zvt_context.factor_cls_registry[factor](
                    entity_schema=zvt_context.entity_schema_map[entity_type],
                    level=level,
                    codes=[code]).drawer())
            stacked = StackedDrawer(*drawers)

            return dcc.Graph(id=f'{factor}-{entity_type}-{code}',
                             figure=stacked.draw_kline(show=False, height=900))
        else:
            if type(levels) is list:
                level = levels[0]
            else:
                level = levels
            drawer = zvt_context.factor_cls_registry[factor](
                entity_schema=zvt_context.entity_schema_map[entity_type],
                level=level,
                codes=[code],
                need_persist=False).drawer()
            if pd_is_not_null(sub_df):
                drawer.add_sub_df(sub_df)

            return dcc.Graph(id=f'{factor}-{entity_type}-{code}',
                             figure=drawer.draw_kline(show=False, height=800))
    raise dash.PreventUpdate()
Beispiel #17
0
    s = block_df["name"].value_counts()

    cycle_df = pd.DataFrame(columns=s.index, data=[s.tolist()])
    cycle_df["entity_id"] = "stock_cn_industry"
    cycle_df["timestamp"] = timestamp
    drawer = Drawer(main_df=cycle_df)
    drawer.draw_pie(show=True)


if __name__ == "__main__":
    df = get_performance_stats_by_month()
    print(df)
    dfs = []
    for timestamp, _, df in get_top_performance_by_month(
            start_timestamp="2012-01-01", list_days=250):
        if pd_is_not_null(df):
            entity_ids = df.index.tolist()
            the_date = pre_month_end_date(timestamp)
            show_industry_composition(entity_ids=entity_ids,
                                      timestamp=timestamp)
    for entity_id in df.index:
        from zvt.utils.time_utils import month_end_date, pre_month_start_date

        end_date = month_end_date(pre_month_start_date(timestamp))
        TechnicalFactor(entity_ids=[entity_id],
                        end_timestamp=end_date).draw(show=True)

# the __all__ is generated
__all__ = [
    "WindowMethod",
    "TopType",
Beispiel #18
0
    def acc_one(self, entity_id, df: pd.DataFrame, acc_df: pd.DataFrame,
                state: dict) -> (pd.DataFrame, dict):
        self.logger.info(f"acc_one:{entity_id}")
        if pd_is_not_null(acc_df):
            df = df[df.index > acc_df.index[-1]]
            if pd_is_not_null(df):
                self.logger.info(f'compute from {df.iloc[0]["timestamp"]}')
                # 遍历的开始位置
                start_index = len(acc_df)

                acc_df = pd.concat([acc_df, df])

                zen_state = ZState(state)

                acc_df = acc_df.reset_index(drop=True)
                current_interval = acc_df.iloc[start_index -
                                               1]["current_interval"]
            else:
                self.logger.info("no need to compute")
                return acc_df, state
        else:
            acc_df = df
            # 笔的底
            acc_df["bi_di"] = False
            # 笔的顶
            acc_df["bi_ding"] = False
            # 记录笔顶/底分型的值,bi_di取low,bi_ding取high,其他为None,绘图时取有值的连线即为 笔
            acc_df["bi_value"] = np.NAN
            # 笔的变化
            acc_df["bi_change"] = np.NAN
            # 笔的斜率
            acc_df["bi_slope"] = np.NAN
            # 持续的周期
            acc_df["bi_interval"] = np.NAN

            # 记录临时分型,不变
            acc_df["tmp_ding"] = False
            acc_df["tmp_di"] = False
            # 分型的力度
            acc_df["fenxing_power"] = np.NAN

            # 目前分型确定的方向
            acc_df["current_direction"] = None
            acc_df["current_change"] = np.NAN
            acc_df["current_interval"] = np.NAN
            acc_df["current_slope"] = np.NAN
            # 最近的一个笔中枢
            # acc_df['current_zhongshu'] = np.NAN
            acc_df["current_zhongshu_change"] = np.NAN
            acc_df["current_zhongshu_y0"] = np.NAN
            acc_df["current_zhongshu_y1"] = np.NAN

            # 目前走势的临时方向 其跟direction的的关系 确定了下一个分型
            acc_df["tmp_direction"] = None
            acc_df["opposite_change"] = np.NAN
            acc_df["opposite_interval"] = np.NAN
            acc_df["opposite_slope"] = np.NAN

            acc_df["duan_state"] = "yi"

            # 段的底
            acc_df["duan_di"] = False
            # 段的顶
            acc_df["duan_ding"] = False
            # 记录段顶/底的值,为duan_di时取low,为duan_ding时取high,其他为None,绘图时取有值的连线即为 段
            acc_df["duan_value"] = np.NAN
            # 段的变化
            acc_df["duan_change"] = np.NAN
            # 段的斜率
            acc_df["duan_slope"] = np.NAN
            # 持续的周期
            acc_df["duan_interval"] = np.NAN

            # 记录在确定中枢的最后一个段的终点x1,值为Rect(x0,y0,x1,y1)
            acc_df["zhongshu"] = None
            acc_df["zhongshu_change"] = np.NAN

            acc_df["bi_zhongshu"] = None
            acc_df["bi_zhongshu_change"] = np.NAN

            acc_df = acc_df.reset_index(drop=True)

            zen_state = ZState(
                dict(
                    fenxing_list=[],
                    direction=None,
                    can_fenxing=None,
                    can_fenxing_index=None,
                    opposite_count=0,
                    current_duan_state="yi",
                    duans=[],
                    pre_bi=None,
                    pre_duan=None,
                ))

            zen_state.fenxing_list: List[Fenxing] = []

            # 取前11条k线,至多出现一个顶分型+底分型
            # 注:只是一种方便的确定第一个分型的办法,有了第一个分型,后面的处理就比较统一
            # start_index 为遍历开始的位置
            # direction为一个确定分型后的方向,即顶分型后为:down,底分型后为:up
            fenxing, start_index, direction, current_interval = handle_first_fenxing(
                acc_df, step=11)
            if not fenxing:
                return None, None

            zen_state.fenxing_list.append(fenxing)
            zen_state.direction = direction

            # list of (timestamp,value)
            zen_state.duans = []
            zen_state.bis = []

        pre_kdata = acc_df.iloc[start_index - 1]
        pre_index = start_index - 1

        tmp_direction = zen_state.direction

        current_zhongshu = None
        current_zhongshu_change = None
        for index, kdata in acc_df.iloc[start_index:].iterrows():
            # print(f'timestamp: {kdata.timestamp}')
            # 临时方向
            tmp_direction = get_direction(kdata,
                                          pre_kdata,
                                          current=tmp_direction)

            # current states
            current_interval = current_interval + 1
            if zen_state.direction == Direction.up:
                pre_value = acc_df.loc[zen_state.fenxing_list[0].index, "low"]
                current_value = kdata["high"]
            else:
                pre_value = acc_df.loc[zen_state.fenxing_list[0].index, "high"]
                current_value = kdata["low"]
            acc_df.loc[index, "current_direction"] = zen_state.direction.value
            acc_df.loc[index, "current_interval"] = current_interval
            change = (current_value - pre_value) / pre_value
            acc_df.loc[index, "current_change"] = change
            acc_df.loc[index, "current_slope"] = change / current_interval
            if current_zhongshu:
                # acc_df.loc[index, 'current_zhongshu'] = current_zhongshu
                acc_df.loc[index, "current_zhongshu_y0"] = current_zhongshu.y0
                acc_df.loc[index, "current_zhongshu_y1"] = current_zhongshu.y1
                acc_df.loc[index,
                           "current_zhongshu_change"] = current_zhongshu_change
            else:
                # acc_df.loc[index, 'current_zhongshu'] = acc_df.loc[index - 1, 'current_zhongshu']
                acc_df.loc[index, "current_zhongshu_y0"] = acc_df.loc[
                    index - 1, "current_zhongshu_y0"]
                acc_df.loc[index, "current_zhongshu_y1"] = acc_df.loc[
                    index - 1, "current_zhongshu_y1"]
                acc_df.loc[index, "current_zhongshu_change"] = acc_df.loc[
                    index - 1, "current_zhongshu_change"]

            # 处理包含关系
            handle_including(
                one_df=acc_df,
                index=index,
                kdata=kdata,
                pre_index=pre_index,
                pre_kdata=pre_kdata,
                tmp_direction=tmp_direction,
            )

            # 根据方向,寻找对应的分型 和 段
            if zen_state.direction == Direction.up:
                tmp_fenxing_col = "tmp_ding"
                fenxing_col = "bi_ding"
            else:
                tmp_fenxing_col = "tmp_di"
                fenxing_col = "bi_di"

            # 方向一致,延续中
            if tmp_direction == zen_state.direction:
                zen_state.opposite_count = 0
            # 反向,寻找反 分型
            else:
                zen_state.opposite_count = zen_state.opposite_count + 1

                # opposite states
                current_interval = zen_state.opposite_count
                if tmp_direction == Direction.up:
                    pre_value = acc_df.loc[index - zen_state.opposite_count,
                                           "low"]
                    current_value = kdata["high"]
                else:
                    pre_value = acc_df.loc[index - zen_state.opposite_count,
                                           "high"]
                    current_value = kdata["low"]
                acc_df.loc[index, "tmp_direction"] = tmp_direction.value
                acc_df.loc[index, "opposite_interval"] = current_interval
                change = (current_value - pre_value) / pre_value
                acc_df.loc[index, "opposite_change"] = change
                acc_df.loc[index, "opposite_slope"] = change / current_interval

                # 第一次反向
                if zen_state.opposite_count == 1:
                    acc_df.loc[pre_index, tmp_fenxing_col] = True
                    acc_df.loc[pre_index, "fenxing_power"] = fenxing_power(
                        acc_df.loc[pre_index - 1],
                        pre_kdata,
                        kdata,
                        fenxing=tmp_fenxing_col)

                    if zen_state.can_fenxing is not None:
                        # 候选底分型
                        if tmp_direction == Direction.up:
                            # 取小的
                            if pre_kdata["low"] <= zen_state.can_fenxing["low"]:
                                zen_state.can_fenxing = pre_kdata[[
                                    "low", "high"
                                ]]
                                zen_state.can_fenxing_index = pre_index

                        # 候选顶分型
                        else:
                            # 取大的
                            if pre_kdata["high"] >= zen_state.can_fenxing[
                                    "high"]:
                                zen_state.can_fenxing = pre_kdata[[
                                    "low", "high"
                                ]]
                                zen_state.can_fenxing_index = pre_index
                    else:
                        zen_state.can_fenxing = pre_kdata[["low", "high"]]
                        zen_state.can_fenxing_index = pre_index

                # 分型确立
                if zen_state.can_fenxing is not None:
                    if zen_state.opposite_count >= 4 or (
                            index - zen_state.can_fenxing_index >= 8):
                        acc_df.loc[zen_state.can_fenxing_index,
                                   fenxing_col] = True

                        # 记录笔的值
                        if fenxing_col == "bi_ding":
                            bi_value = acc_df.loc[zen_state.can_fenxing_index,
                                                  "high"]
                        else:
                            bi_value = acc_df.loc[zen_state.can_fenxing_index,
                                                  "low"]
                        acc_df.loc[zen_state.can_fenxing_index,
                                   "bi_value"] = bi_value

                        # 计算笔斜率
                        if zen_state.pre_bi:
                            change = (bi_value - zen_state.pre_bi[1]
                                      ) / zen_state.pre_bi[1]
                            interval = zen_state.can_fenxing_index - zen_state.pre_bi[
                                0]
                            bi_slope = change / interval
                            acc_df.loc[zen_state.can_fenxing_index,
                                       "bi_change"] = change
                            acc_df.loc[zen_state.can_fenxing_index,
                                       "bi_slope"] = bi_slope
                            acc_df.loc[zen_state.can_fenxing_index,
                                       "bi_interval"] = interval

                        # 记录用于计算笔中枢的笔
                        zen_state.bis.append((
                            acc_df.loc[zen_state.can_fenxing_index,
                                       "timestamp"],
                            bi_value,
                            zen_state.can_fenxing_index,
                        ))

                        # 计算笔中枢,当下来说这个 中枢 是确定的,并且是不可变的
                        # 但标记的点为 过去,注意在回测时最近的一个中枢可能用到未来函数,前一个才是 已知的
                        # 所以记了一个 current_zhongshu_y0 current_zhongshu_y1 这个是可直接使用的
                        end_index = zen_state.can_fenxing_index

                        (
                            zen_state.bis,
                            current_zhongshu,
                            current_zhongshu_change,
                            current_zhongshu_interval,
                        ) = handle_zhongshu(
                            points=zen_state.bis,
                            acc_df=acc_df,
                            end_index=end_index,
                            zhongshu_col="bi_zhongshu",
                            zhongshu_change_col="bi_zhongshu_change",
                        )

                        zen_state.pre_bi = (zen_state.can_fenxing_index,
                                            bi_value)

                        zen_state.opposite_count = 0
                        zen_state.direction = zen_state.direction.opposite()
                        zen_state.can_fenxing = None

                        # 确定第一个段
                        if zen_state.fenxing_list != None:
                            zen_state.fenxing_list.append(
                                Fenxing(
                                    state=fenxing_col,
                                    kdata={
                                        "low":
                                        float(acc_df.loc[
                                            zen_state.can_fenxing_index]
                                              ["low"]),
                                        "high":
                                        float(acc_df.loc[
                                            zen_state.can_fenxing_index]
                                              ["high"]),
                                    },
                                    index=zen_state.can_fenxing_index,
                                ))

                            if len(zen_state.fenxing_list) == 4:
                                duan_state = handle_duan(
                                    fenxing_list=zen_state.fenxing_list,
                                    pre_duan_state=zen_state.current_duan_state
                                )

                                change = duan_state != zen_state.current_duan_state

                                if change:
                                    zen_state.current_duan_state = duan_state

                                    # 确定状态
                                    acc_df.loc[
                                        zen_state.fenxing_list[0].
                                        index:zen_state.fenxing_list[-1].index,
                                        "duan_state"] = zen_state.current_duan_state

                                    duan_index = zen_state.fenxing_list[
                                        0].index
                                    if zen_state.current_duan_state == "up":
                                        acc_df.loc[duan_index,
                                                   "duan_di"] = True
                                        duan_value = acc_df.loc[duan_index,
                                                                "low"]
                                    else:
                                        duan_index = zen_state.fenxing_list[
                                            0].index
                                        acc_df.loc[duan_index,
                                                   "duan_ding"] = True
                                        duan_value = acc_df.loc[duan_index,
                                                                "high"]
                                    # 记录段的值
                                    acc_df.loc[duan_index,
                                               "duan_value"] = duan_value

                                    # 计算段斜率
                                    if zen_state.pre_duan:
                                        change = (duan_value -
                                                  zen_state.pre_duan[1]
                                                  ) / zen_state.pre_duan[1]
                                        interval = duan_index - zen_state.pre_duan[
                                            0]
                                        duan_slope = change / interval
                                        acc_df.loc[duan_index,
                                                   "duan_change"] = change
                                        acc_df.loc[duan_index,
                                                   "duan_slope"] = duan_slope
                                        acc_df.loc[duan_index,
                                                   "duan_interval"] = interval

                                    zen_state.pre_duan = (duan_index,
                                                          duan_value)

                                    # 记录用于计算中枢的段
                                    zen_state.duans.append(
                                        (acc_df.loc[duan_index, "timestamp"],
                                         duan_value, duan_index))

                                    # 计算中枢
                                    zen_state.duans, _, _, _ = handle_zhongshu(
                                        points=zen_state.duans,
                                        acc_df=acc_df,
                                        end_index=duan_index,
                                        zhongshu_col="zhongshu",
                                        zhongshu_change_col="zhongshu_change",
                                    )

                                    # 只留最后一个
                                    zen_state.fenxing_list = zen_state.fenxing_list[
                                        -1:]
                                else:
                                    # 保持之前的状态并踢出候选
                                    acc_df.loc[
                                        zen_state.fenxing_list[0].index,
                                        "duan_state"] = zen_state.current_duan_state
                                    zen_state.fenxing_list = zen_state.fenxing_list[
                                        1:]

            pre_kdata = kdata
            pre_index = index

        acc_df = acc_df.set_index("timestamp", drop=False)
        return acc_df, zen_state
Beispiel #19
0
    def transform_one(self, entity_id, df: pd.DataFrame) -> pd.DataFrame:
        # 记录段区间
        if entity_id not in self.entity_duan_intervals:
            self.entity_duan_intervals[entity_id] = []

        df = df.reset_index(drop=True)
        # 笔的底
        df['bi_di'] = False
        # 笔的顶
        df['bi_ding'] = False

        # 记录临时分型,不变
        df['tmp_ding'] = False
        df['tmp_di'] = False

        df['duan_state'] = 'yi'

        # 段的底
        df['duan_di'] = False
        # 段的顶
        df['duan_ding'] = False

        fenxing_list: List[Fenxing] = []

        # 取前11条k线,至多出现一个顶分型+底分型
        # 注:只是一种方便的确定第一个分型的办法,有了第一个分型,后面的处理就比较统一
        # start_index 为遍历开始的位置
        # direction为一个确定分型后的方向,即顶分型后为:down,底分型后为:up
        fenxing, start_index, direction = handle_first_fenxing(df, step=11)
        fenxing_list.append(fenxing)
        # 临时方向
        tmp_direction = direction
        # 候选分型(candidate)
        can_fenxing = None
        can_fenxing_index = None
        # 正向count
        count = 0
        # 反方向count
        opposite_count = 0
        # 目前段的方向
        current_duan_state = 'yi'

        pre_kdata = df.iloc[start_index - 1]
        pre_index = start_index - 1
        for index, kdata in df.iloc[start_index:].iterrows():
            # print(f'timestamp: {kdata.timestamp}')
            # 临时方向
            tmp_direction = get_direction(kdata,
                                          pre_kdata,
                                          current=tmp_direction)

            # 处理包含关系
            handle_including(one_df=df,
                             index=index,
                             kdata=kdata,
                             pre_index=pre_index,
                             pre_kdata=pre_kdata,
                             tmp_direction=tmp_direction)

            # 根据方向,寻找对应的分型 和 段
            if direction == Direction.up:
                tmp_fenxing_col = 'tmp_ding'
                fenxing_col = 'bi_ding'
            else:
                tmp_fenxing_col = 'tmp_di'
                fenxing_col = 'bi_di'

            # 方向一致,延续中
            if tmp_direction == direction:
                opposite_count = 0
            # 反向,寻找反 分型
            else:
                opposite_count = opposite_count + 1
                # 第一次反向
                if opposite_count == 1:
                    df.loc[pre_index, tmp_fenxing_col] = True

                    if pd_is_not_null(can_fenxing):
                        # 候选底分型
                        if tmp_direction == Direction.up:
                            # 取小的
                            if pre_kdata['low'] <= can_fenxing['low']:
                                can_fenxing = pre_kdata
                                can_fenxing_index = pre_index

                        # 候选顶分型
                        else:
                            # 取大的
                            if pre_kdata['high'] >= can_fenxing['high']:
                                can_fenxing = pre_kdata
                                can_fenxing_index = pre_index
                    else:
                        can_fenxing = pre_kdata
                        can_fenxing_index = pre_index

                # 分型确立
                if pd_is_not_null(can_fenxing):
                    if opposite_count >= 4 or (index - can_fenxing_index >= 8):
                        df.loc[can_fenxing_index, fenxing_col] = True
                        opposite_count = 0
                        direction = direction.opposite()
                        can_fenxing = None

                        # 确定第一个段
                        if fenxing_list != None:
                            fenxing_list.append(
                                Fenxing(state=fenxing_col,
                                        kdata=df.loc[can_fenxing_index],
                                        index=can_fenxing_index))

                            if len(fenxing_list) == 4:
                                duan_state = handle_duan(
                                    fenxing_list=fenxing_list,
                                    pre_duan_state=current_duan_state)

                                change = duan_state != current_duan_state

                                if change:
                                    current_duan_state = duan_state

                                    # 确定状态
                                    df.loc[fenxing_list[0].
                                           index:fenxing_list[-1].index,
                                           'duan_state'] = current_duan_state

                                    if current_duan_state == 'up':
                                        df.loc[fenxing_list[0].index,
                                               'duan_di'] = True
                                    else:
                                        df.loc[fenxing_list[0].index,
                                               'duan_ding'] = True
                                    # 只留最后一个
                                    fenxing_list = fenxing_list[-1:]
                                else:
                                    # 保持之前的状态并踢出候选
                                    df.loc[fenxing_list[0].index,
                                           'duan_state'] = current_duan_state
                                    fenxing_list = fenxing_list[1:]

            pre_kdata = kdata
            pre_index = index

        return df
    def record(self, entity, start, end, size, timestamps):
        # 上证
        if entity.code == "000001":
            all_df = StockMoneyFlow.query_data(
                provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.entity_id.like("stock_sh%")]
            )
        # 深证
        elif entity.code == "399001":
            all_df = StockMoneyFlow.query_data(
                provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.entity_id.like("stock_sz%")]
            )
        # 创业板
        elif entity.code == "399006":
            all_df = StockMoneyFlow.query_data(
                provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.code.like("300%")]
            )
        # 科创板
        elif entity.code == "000688":
            all_df = StockMoneyFlow.query_data(
                provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.code.like("688%")]
            )

        if pd_is_not_null(all_df):
            g = all_df.groupby("timestamp")
            for timestamp, df in g:
                se = pd.Series(
                    {
                        "id": "{}_{}".format(entity.id, to_time_str(timestamp)),
                        "entity_id": entity.id,
                        "timestamp": timestamp,
                        "code": entity.code,
                        "name": entity.name,
                    }
                )
                for col in [
                    "net_main_inflows",
                    "net_huge_inflows",
                    "net_big_inflows",
                    "net_medium_inflows",
                    "net_small_inflows",
                ]:
                    se[col] = df[col].sum()

                for col in [
                    "net_main_inflow_rate",
                    "net_huge_inflow_rate",
                    "net_big_inflow_rate",
                    "net_medium_inflow_rate",
                    "net_small_inflow_rate",
                ]:
                    se[col] = df[col].sum() / len(df)

                index_df = se.to_frame().T

                self.logger.info(index_df)

                df_to_db(
                    df=index_df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update
                )

        return None
Beispiel #21
0
    def acc_one(self, entity_id, df: pd.DataFrame, acc_df: pd.DataFrame,
                state: dict) -> (pd.DataFrame, dict):
        self.logger.info(f'acc_one:{entity_id}')
        if pd_is_not_null(acc_df):
            df = df[df.index > acc_df.index[-1]]
            if pd_is_not_null(df):
                self.logger.info(f'compute from {df.iloc[0]["timestamp"]}')
                # 遍历的开始位置
                start_index = len(acc_df)

                acc_df = pd.concat([acc_df, df])

                zen_state = state

                acc_df = acc_df.reset_index(drop=True)
            else:
                self.logger.info('no need to compute')
                return acc_df, state
        else:
            acc_df = df
            # 笔的底
            acc_df['bi_di'] = False
            # 笔的顶
            acc_df['bi_ding'] = False
            # 记录笔顶/底分型的值,bi_di取low,bi_ding取high,其他为None,绘图时取有值的连线即为 笔
            acc_df['bi_value'] = np.NAN

            # 记录临时分型,不变
            acc_df['tmp_ding'] = False
            acc_df['tmp_di'] = False
            # 分型的力度
            acc_df['fenxing_power'] = np.NAN

            acc_df['duan_state'] = 'yi'

            # 段的底
            acc_df['duan_di'] = False
            # 段的顶
            acc_df['duan_ding'] = False
            # 记录段顶/底的值,为duan_di时取low,为duan_ding时取high,其他为None,绘图时取有值的连线即为 段
            acc_df['duan_value'] = np.NAN

            # 记录在确定中枢的最后一个段的终点x1,值为Rect(x0,y0,x1,y1)
            acc_df['zhongshu'] = np.NAN

            acc_df = acc_df.reset_index(drop=True)

            zen_state = ZenState(
                dict(fenxing_list=[],
                     direction=None,
                     can_fenxing=None,
                     can_fenxing_index=None,
                     opposite_count=0,
                     current_duan_state='yi',
                     duans=[],
                     pre_bi=None,
                     pre_duan=None))

            zen_state.fenxing_list: List[Fenxing] = []

            # 取前11条k线,至多出现一个顶分型+底分型
            # 注:只是一种方便的确定第一个分型的办法,有了第一个分型,后面的处理就比较统一
            # start_index 为遍历开始的位置
            # direction为一个确定分型后的方向,即顶分型后为:down,底分型后为:up
            fenxing, start_index, direction = handle_first_fenxing(acc_df,
                                                                   step=11)
            if not fenxing:
                return None, None

            zen_state.fenxing_list.append(fenxing)
            zen_state.direction = direction

            # list of (timestamp,value)
            zen_state.duans = []

        pre_kdata = acc_df.iloc[start_index - 1]
        pre_index = start_index - 1

        tmp_direction = zen_state.direction

        for index, kdata in acc_df.iloc[start_index:].iterrows():
            # print(f'timestamp: {kdata.timestamp}')
            # 临时方向
            tmp_direction = get_direction(kdata,
                                          pre_kdata,
                                          current=tmp_direction)

            # 处理包含关系
            handle_including(one_df=acc_df,
                             index=index,
                             kdata=kdata,
                             pre_index=pre_index,
                             pre_kdata=pre_kdata,
                             tmp_direction=tmp_direction)

            # 根据方向,寻找对应的分型 和 段
            if zen_state.direction == Direction.up:
                tmp_fenxing_col = 'tmp_ding'
                fenxing_col = 'bi_ding'
            else:
                tmp_fenxing_col = 'tmp_di'
                fenxing_col = 'bi_di'

            # 方向一致,延续中
            if tmp_direction == zen_state.direction:
                zen_state.opposite_count = 0
            # 反向,寻找反 分型
            else:
                zen_state.opposite_count = zen_state.opposite_count + 1
                # 第一次反向
                if zen_state.opposite_count == 1:
                    acc_df.loc[pre_index, tmp_fenxing_col] = True
                    acc_df.loc[pre_index, 'fenxing_power'] = fenxing_power(
                        acc_df.loc[pre_index - 1],
                        pre_kdata,
                        kdata,
                        fenxing=tmp_fenxing_col)

                    if pd_is_not_null(zen_state.can_fenxing):
                        # 候选底分型
                        if tmp_direction == Direction.up:
                            # 取小的
                            if pre_kdata['low'] <= zen_state.can_fenxing['low']:
                                zen_state.can_fenxing = pre_kdata
                                zen_state.can_fenxing_index = pre_index

                        # 候选顶分型
                        else:
                            # 取大的
                            if pre_kdata['high'] >= zen_state.can_fenxing[
                                    'high']:
                                zen_state.can_fenxing = pre_kdata
                                zen_state.can_fenxing_index = pre_index
                    else:
                        zen_state.can_fenxing = pre_kdata
                        zen_state.can_fenxing_index = pre_index

                # 分型确立
                if pd_is_not_null(zen_state.can_fenxing):
                    if zen_state.opposite_count >= 4 or (
                            index - zen_state.can_fenxing_index >= 8):
                        acc_df.loc[zen_state.can_fenxing_index,
                                   fenxing_col] = True

                        # 记录笔的值
                        if fenxing_col == 'bi_ding':
                            bi_value = acc_df.loc[zen_state.can_fenxing_index,
                                                  'high']
                        else:
                            bi_value = acc_df.loc[zen_state.can_fenxing_index,
                                                  'low']
                        acc_df.loc[zen_state.can_fenxing_index,
                                   'bi_value'] = bi_value

                        zen_state.pre_bi = (zen_state.can_fenxing_index,
                                            bi_value)

                        zen_state.opposite_count = 0
                        zen_state.direction = zen_state.direction.opposite()
                        zen_state.can_fenxing = None

                        # 确定第一个段
                        if zen_state.fenxing_list != None:
                            zen_state.fenxing_list.append(
                                Fenxing(state=fenxing_col,
                                        kdata=acc_df.loc[
                                            zen_state.can_fenxing_index,
                                            ['open', 'close', 'high', 'low']],
                                        index=zen_state.can_fenxing_index))

                            if len(zen_state.fenxing_list) == 4:
                                duan_state = handle_duan(
                                    fenxing_list=zen_state.fenxing_list,
                                    pre_duan_state=zen_state.current_duan_state
                                )

                                change = duan_state != zen_state.current_duan_state

                                if change:
                                    zen_state.current_duan_state = duan_state

                                    # 确定状态
                                    acc_df.loc[
                                        zen_state.fenxing_list[0].
                                        index:zen_state.fenxing_list[-1].index,
                                        'duan_state'] = zen_state.current_duan_state

                                    duan_index = zen_state.fenxing_list[
                                        0].index
                                    if zen_state.current_duan_state == 'up':
                                        acc_df.loc[duan_index,
                                                   'duan_di'] = True
                                        duan_value = acc_df.loc[duan_index,
                                                                'low']
                                    else:
                                        duan_index = zen_state.fenxing_list[
                                            0].index
                                        acc_df.loc[duan_index,
                                                   'duan_ding'] = True
                                        duan_value = acc_df.loc[duan_index,
                                                                'high']
                                    # 记录段的值
                                    acc_df.loc[duan_index,
                                               'duan_value'] = duan_value

                                    # 记录用于计算中枢的段
                                    zen_state.duans.append(
                                        (acc_df.loc[duan_index,
                                                    'timestamp'], duan_value))

                                    # 计算中枢
                                    if len(zen_state.duans) == 4:
                                        x1 = zen_state.duans[0][0]
                                        x2 = zen_state.duans[3][0]
                                        if zen_state.duans[0][
                                                1] < zen_state.duans[1][1]:
                                            # 向下段
                                            range = intersect(
                                                (zen_state.duans[0][1],
                                                 zen_state.duans[1][1]),
                                                (zen_state.duans[2][1],
                                                 zen_state.duans[3][1]))
                                            if range:
                                                y1, y2 = range
                                                # 记录中枢
                                                acc_df.loc[duan_index,
                                                           'zhongshu'] = Rect(
                                                               x0=x1,
                                                               x1=x2,
                                                               y0=y1,
                                                               y1=y2)
                                                zen_state.duans = zen_state.duans[
                                                    -1:]
                                            else:
                                                zen_state.duans = zen_state.duans[
                                                    1:]
                                        else:
                                            # 向上段
                                            range = intersect(
                                                (zen_state.duans[1][1],
                                                 zen_state.duans[0][1]),
                                                (zen_state.duans[3][1],
                                                 zen_state.duans[2][1]))
                                            if range:
                                                y1, y2 = range
                                                # 记录中枢
                                                acc_df.loc[duan_index,
                                                           'zhongshu'] = Rect(
                                                               x0=x1,
                                                               x1=x2,
                                                               y0=y1,
                                                               y1=y2)
                                                zen_state.duans = zen_state.duans[
                                                    -1:]
                                            else:
                                                zen_state.duans = zen_state.duans[
                                                    1:]

                                    # 只留最后一个
                                    zen_state.fenxing_list = zen_state.fenxing_list[
                                        -1:]
                                else:
                                    # 保持之前的状态并踢出候选
                                    acc_df.loc[
                                        zen_state.fenxing_list[0].index,
                                        'duan_state'] = zen_state.current_duan_state
                                    zen_state.fenxing_list = zen_state.fenxing_list[
                                        1:]

            pre_kdata = kdata
            pre_index = index

        acc_df = acc_df.set_index('timestamp', drop=False)
        return acc_df, zen_state
Beispiel #22
0
    def record(self, entity, start, end, size, timestamps):
        if not self.end_timestamp:
            df = get_money_flow(code=to_jq_entity_id(entity),
                                date=to_time_str(start))
        else:
            df = get_money_flow(code=to_jq_entity_id(entity),
                                date=start,
                                end_date=to_time_str(self.end_timestamp))

        df = df.dropna()

        if pd_is_not_null(df):
            df["name"] = entity.name
            df.rename(
                columns={
                    "date": "timestamp",
                    "net_amount_main": "net_main_inflows",
                    "net_pct_main": "net_main_inflow_rate",
                    "net_amount_xl": "net_huge_inflows",
                    "net_pct_xl": "net_huge_inflow_rate",
                    "net_amount_l": "net_big_inflows",
                    "net_pct_l": "net_big_inflow_rate",
                    "net_amount_m": "net_medium_inflows",
                    "net_pct_m": "net_medium_inflow_rate",
                    "net_amount_s": "net_small_inflows",
                    "net_pct_s": "net_small_inflow_rate",
                },
                inplace=True,
            )

            # 转换到标准float
            inflows_cols = [
                "net_main_inflows",
                "net_huge_inflows",
                "net_big_inflows",
                "net_medium_inflows",
                "net_small_inflows",
            ]
            for col in inflows_cols:
                df[col] = pd.to_numeric(df[col], errors="coerce")
            df = df.dropna()

            if not pd_is_not_null(df):
                return None

            df[inflows_cols] = df[inflows_cols].apply(lambda x: x * 10000)

            inflow_rate_cols = [
                "net_main_inflow_rate",
                "net_huge_inflow_rate",
                "net_big_inflow_rate",
                "net_medium_inflow_rate",
                "net_small_inflow_rate",
            ]
            for col in inflow_rate_cols:
                df[col] = pd.to_numeric(df[col], errors="coerce")
            df = df.dropna()
            if not pd_is_not_null(df):
                return None

            df[inflow_rate_cols] = df[inflow_rate_cols].apply(
                lambda x: x / 100)

            # 计算总流入
            df["net_inflows"] = (df["net_huge_inflows"] +
                                 df["net_big_inflows"] +
                                 df["net_medium_inflows"] +
                                 df["net_small_inflows"])
            # 计算总流入率
            amount = df["net_main_inflows"] / df["net_main_inflow_rate"]
            df["net_inflow_rate"] = df["net_inflows"] / amount

            df["entity_id"] = entity.id
            df["timestamp"] = pd.to_datetime(df["timestamp"])
            df["provider"] = "joinquant"
            df["code"] = entity.code

            def generate_kdata_id(se):
                return "{}_{}".format(
                    se["entity_id"],
                    to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY))

            df["id"] = df[["entity_id", "timestamp"]].apply(generate_kdata_id,
                                                            axis=1)

            df = df.drop_duplicates(subset="id", keep="last")

            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=self.force_update)

        return None
Beispiel #23
0
def get_top_entities(
    data_schema: Mixin,
    column: str,
    start_timestamp=None,
    end_timestamp=None,
    pct=0.1,
    method: WindowMethod = WindowMethod.change,
    return_type: TopType = None,
    kdata_filters=None,
    show_name=False,
    data_provider=None,
):
    """
    get top entities in specific domain between time range

    :param data_schema: schema in domain
    :param column: schema column
    :param start_timestamp:
    :param end_timestamp:
    :param pct: range (0,1]
    :param method:
    :param return_type:
    :param entity_filters:
    :param kdata_filters:
    :param show_name: show entity name
    :return:
    """
    if type(method) == str:
        method = WindowMethod(method)

    if type(return_type) == str:
        return_type = TopType(return_type)

    if show_name:
        columns = ["entity_id", column, "name"]
    else:
        columns = ["entity_id", column]

    all_df = data_schema.query_data(
        start_timestamp=start_timestamp,
        end_timestamp=end_timestamp,
        columns=columns,
        filters=kdata_filters,
        provider=data_provider,
    )
    if not pd_is_not_null(all_df):
        return None, None
    g = all_df.groupby("entity_id")
    tops = {}
    names = {}
    for entity_id, df in g:
        if method == WindowMethod.change:
            start = df[column].iloc[0]
            end = df[column].iloc[-1]
            if start != 0:
                change = (end - start) / abs(start)
            else:
                change = 0
            tops[entity_id] = change
        elif method == WindowMethod.avg:
            tops[entity_id] = df[column].mean()
        elif method == WindowMethod.sum:
            tops[entity_id] = df[column].sum()

        if show_name:
            names[entity_id] = df["name"].iloc[0]

    positive_df = None
    negative_df = None
    top_index = int(len(tops) * pct)
    if return_type is None or return_type == TopType.positive:
        # from big to small
        positive_tops = {
            k: v
            for k, v in sorted(
                tops.items(), key=lambda item: item[1], reverse=True)
        }
        positive_tops = dict(itertools.islice(positive_tops.items(),
                                              top_index))
        positive_df = pd.DataFrame.from_dict(positive_tops, orient="index")

        col = "score"
        positive_df.columns = [col]
        positive_df.sort_values(by=col, ascending=False)
    if return_type is None or return_type == TopType.negative:
        # from small to big
        negative_tops = {
            k: v
            for k, v in sorted(tops.items(), key=lambda item: item[1])
        }
        negative_tops = dict(itertools.islice(negative_tops.items(),
                                              top_index))
        negative_df = pd.DataFrame.from_dict(negative_tops, orient="index")

        col = "score"
        negative_df.columns = [col]
        negative_df.sort_values(by=col)

    if names:
        if pd_is_not_null(positive_df):
            positive_df["name"] = positive_df.index.map(lambda x: names[x])
        if pd_is_not_null(negative_df):
            negative_df["name"] = negative_df.index.map(lambda x: names[x])
    return positive_df, negative_df
    def record(self, entity, start, end, size, timestamps):
        if not self.end_timestamp:
            df = get_money_flow(code=to_jq_entity_id(entity),
                                date=to_time_str(start))
        else:
            df = get_money_flow(code=to_jq_entity_id(entity),
                                date=start,
                                end_date=to_time_str(self.end_timestamp))

        df = df.dropna()

        if pd_is_not_null(df):
            df['name'] = entity.name
            df.rename(columns={
                'date': 'timestamp',
                'net_amount_main': 'net_main_inflows',
                'net_pct_main': 'net_main_inflow_rate',
                'net_amount_xl': 'net_huge_inflows',
                'net_pct_xl': 'net_huge_inflow_rate',
                'net_amount_l': 'net_big_inflows',
                'net_pct_l': 'net_big_inflow_rate',
                'net_amount_m': 'net_medium_inflows',
                'net_pct_m': 'net_medium_inflow_rate',
                'net_amount_s': 'net_small_inflows',
                'net_pct_s': 'net_small_inflow_rate'
            },
                      inplace=True)

            # 转换到标准float
            inflows_cols = [
                'net_main_inflows', 'net_huge_inflows', 'net_big_inflows',
                'net_medium_inflows', 'net_small_inflows'
            ]
            for col in inflows_cols:
                df[col] = pd.to_numeric(df[col], errors='coerce')
            df = df.dropna()

            if not pd_is_not_null(df):
                return None

            df[inflows_cols] = df[inflows_cols].apply(lambda x: x * 10000)

            inflow_rate_cols = [
                'net_main_inflow_rate', 'net_huge_inflow_rate',
                'net_big_inflow_rate', 'net_medium_inflow_rate',
                'net_small_inflow_rate'
            ]
            for col in inflow_rate_cols:
                df[col] = pd.to_numeric(df[col], errors='coerce')
            df = df.dropna()
            if not pd_is_not_null(df):
                return None

            df[inflow_rate_cols] = df[inflow_rate_cols].apply(
                lambda x: x / 100)

            # 计算总流入
            df['net_inflows'] = df['net_huge_inflows'] + df[
                'net_big_inflows'] + df['net_medium_inflows'] + df[
                    'net_small_inflows']
            # 计算总流入率
            amount = df['net_main_inflows'] / df['net_main_inflow_rate']
            df['net_inflow_rate'] = df['net_inflows'] / amount

            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = 'joinquant'
            df['code'] = entity.code

            def generate_kdata_id(se):
                return "{}_{}".format(
                    se['entity_id'],
                    to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id,
                                                            axis=1)

            df = df.drop_duplicates(subset='id', keep='last')

            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=self.force_update)

        return None
Beispiel #25
0
    def transform_one(self, entity_id, df: pd.DataFrame) -> pd.DataFrame:
        # 记录段区间
        if entity_id not in self.entity_duan_intervals:
            self.entity_duan_intervals[entity_id] = []

        df = df.reset_index(drop=True)
        # 笔的底
        df['bi_di'] = False
        # 笔的顶
        df['bi_ding'] = False

        # 记录临时分型,不变
        df['tmp_ding'] = False
        df['tmp_di'] = False

        df['duan_state'] = 'yi'

        # 段的底
        df['duan_di'] = False
        # 段的顶
        df['duan_ding'] = False
        # 记录段顶/底的值,为duan_di时取low,为duan_ding时取high,其他为None,绘图时取有值的连线即为 段
        df['duan_value'] = np.NAN

        # 记录在确定中枢的最后一个段的终点x1,值为Rect(x0,y0,x1,y1)
        df['zhongshu'] = None

        fenxing_list: List[Fenxing] = []

        # 取前11条k线,至多出现一个顶分型+底分型
        # 注:只是一种方便的确定第一个分型的办法,有了第一个分型,后面的处理就比较统一
        # start_index 为遍历开始的位置
        # direction为一个确定分型后的方向,即顶分型后为:down,底分型后为:up
        fenxing, start_index, direction = handle_first_fenxing(df, step=11)
        fenxing_list.append(fenxing)
        # 临时方向
        tmp_direction = direction
        # 候选分型(candidate)
        can_fenxing = None
        can_fenxing_index = None
        # 正向count
        count = 0
        # 反方向count
        opposite_count = 0
        # 目前段的方向
        current_duan_state = 'yi'

        pre_kdata = df.iloc[start_index - 1]
        pre_index = start_index - 1

        # list of (timestamp,value)
        duans = []

        for index, kdata in df.iloc[start_index:].iterrows():
            # print(f'timestamp: {kdata.timestamp}')
            # 临时方向
            tmp_direction = get_direction(kdata,
                                          pre_kdata,
                                          current=tmp_direction)

            # 处理包含关系
            handle_including(one_df=df,
                             index=index,
                             kdata=kdata,
                             pre_index=pre_index,
                             pre_kdata=pre_kdata,
                             tmp_direction=tmp_direction)

            # 根据方向,寻找对应的分型 和 段
            if direction == Direction.up:
                tmp_fenxing_col = 'tmp_ding'
                fenxing_col = 'bi_ding'
            else:
                tmp_fenxing_col = 'tmp_di'
                fenxing_col = 'bi_di'

            # 方向一致,延续中
            if tmp_direction == direction:
                opposite_count = 0
            # 反向,寻找反 分型
            else:
                opposite_count = opposite_count + 1
                # 第一次反向
                if opposite_count == 1:
                    df.loc[pre_index, tmp_fenxing_col] = True

                    if pd_is_not_null(can_fenxing):
                        # 候选底分型
                        if tmp_direction == Direction.up:
                            # 取小的
                            if pre_kdata['low'] <= can_fenxing['low']:
                                can_fenxing = pre_kdata
                                can_fenxing_index = pre_index

                        # 候选顶分型
                        else:
                            # 取大的
                            if pre_kdata['high'] >= can_fenxing['high']:
                                can_fenxing = pre_kdata
                                can_fenxing_index = pre_index
                    else:
                        can_fenxing = pre_kdata
                        can_fenxing_index = pre_index

                # 分型确立
                if pd_is_not_null(can_fenxing):
                    if opposite_count >= 4 or (index - can_fenxing_index >= 8):
                        df.loc[can_fenxing_index, fenxing_col] = True

                        # 记录笔的值
                        if fenxing_col == 'bi_ding':
                            df.loc[can_fenxing_index,
                                   'bi_value'] = df.loc[can_fenxing_index,
                                                        'high']
                        else:
                            df.loc[can_fenxing_index,
                                   'bi_value'] = df.loc[can_fenxing_index,
                                                        'low']

                        opposite_count = 0
                        direction = direction.opposite()
                        can_fenxing = None

                        # 确定第一个段
                        if fenxing_list != None:
                            fenxing_list.append(
                                Fenxing(state=fenxing_col,
                                        kdata=df.loc[can_fenxing_index],
                                        index=can_fenxing_index))

                            if len(fenxing_list) == 4:
                                duan_state = handle_duan(
                                    fenxing_list=fenxing_list,
                                    pre_duan_state=current_duan_state)

                                change = duan_state != current_duan_state

                                if change:
                                    current_duan_state = duan_state

                                    # 确定状态
                                    df.loc[fenxing_list[0].
                                           index:fenxing_list[-1].index,
                                           'duan_state'] = current_duan_state

                                    duan_index = fenxing_list[0].index
                                    if current_duan_state == 'up':
                                        df.loc[duan_index, 'duan_di'] = True
                                        duan_value = df.loc[duan_index, 'low']
                                    else:
                                        duan_index = fenxing_list[0].index
                                        df.loc[duan_index, 'duan_ding'] = True
                                        duan_value = df.loc[duan_index, 'high']
                                    # 记录段的值
                                    df.loc[duan_index,
                                           'duan_value'] = duan_value
                                    # 记录用于计算中枢的段
                                    duans.append(
                                        (df.loc[duan_index,
                                                'timestamp'], duan_value))

                                    # 计算中枢
                                    if len(duans) == 4:
                                        x1 = duans[0][0]
                                        x2 = duans[3][0]
                                        if duans[0][1] < duans[1][1]:
                                            # 向下段
                                            range = intersect(
                                                (duans[0][1], duans[1][1]),
                                                (duans[2][1], duans[3][1]))
                                            if range:
                                                y1, y2 = range
                                                # 记录中枢
                                                df.loc[duan_index,
                                                       'zhongshu'] = Rect(
                                                           x0=x1,
                                                           x1=x2,
                                                           y0=y1,
                                                           y1=y2)
                                                duans = duans[-1:]
                                            else:
                                                duans = duans[1:]
                                        else:
                                            # 向上段
                                            range = intersect(
                                                (duans[1][1], duans[0][1]),
                                                (duans[3][1], duans[2][1]))
                                            if range:
                                                y1, y2 = range
                                                # 记录中枢
                                                df.loc[duan_index,
                                                       'zhongshu'] = Rect(
                                                           x0=x1,
                                                           x1=x2,
                                                           y0=y1,
                                                           y1=y2)
                                                duans = duans[-1:]
                                            else:
                                                duans = duans[1:]

                                    # 只留最后一个
                                    fenxing_list = fenxing_list[-1:]
                                else:
                                    # 保持之前的状态并踢出候选
                                    df.loc[fenxing_list[0].index,
                                           'duan_state'] = current_duan_state
                                    fenxing_list = fenxing_list[1:]

            pre_kdata = kdata
            pre_index = index

        df = df.set_index('timestamp')
        return df
Beispiel #26
0
 def on_data_loaded(self, data: pd.DataFrame):
     if pd_is_not_null(self.factor_df):
         self.factor_df['zhongshu'] = self.factor_df['zhongshu'].apply(
             lambda x: json.loads(x, object_hook=decode_rect))
     return super().on_data_loaded(data)