def on_time(self, timestamp): # 增持5000股以上 long_df = ManagerTrading.query_data( start_timestamp=timestamp, end_timestamp=timestamp, filters=[ManagerTrading.volume > 5000], columns=[ManagerTrading.entity_id], order=ManagerTrading.volume.desc(), limit=10) # 减持5000股以上 short_df = ManagerTrading.query_data( start_timestamp=timestamp, end_timestamp=timestamp, filters=[ManagerTrading.volume < -5000], columns=[ManagerTrading.entity_id], order=ManagerTrading.volume.asc(), limit=10) if pd_is_not_null(long_df) or pd_is_not_null(short_df): try: self.trade_the_targets( due_timestamp=timestamp, happen_timestamp=timestamp, long_selected=set(long_df['entity_id'].to_list()), short_selected=set(short_df['entity_id'].to_list())) except Exception as e: self.logger.error(e)
def generate_targets(self): keep_result = pd.DataFrame() long_result = pd.DataFrame() short_result = pd.DataFrame() if pd_is_not_null(self.filter_result): keep_result = self.filter_result[self.filter_result['score'].isna()] long_result = self.filter_result[self.filter_result['score'] == True] short_result = self.filter_result[self.filter_result['score'] == False] if pd_is_not_null(self.score_result): score_keep_result = self.score_result[(self.score_result['score'] > self.short_threshold) & ( self.score_result['score'] < self.long_threshold)] if pd_is_not_null(keep_result): keep_result = score_keep_result.loc[keep_result.index, :] else: keep_result = score_keep_result score_long_result = self.score_result[self.score_result['score'] >= self.long_threshold] if pd_is_not_null(long_result): long_result = score_long_result.loc[long_result.index, :] else: long_result = score_long_result score_short_result = self.score_result[self.score_result['score'] <= self.short_threshold] if pd_is_not_null(short_result): short_result = score_short_result.loc[short_result.index, :] else: short_result = score_short_result self.keep_df = self.normalize_result_df(keep_result) self.open_long_df = self.normalize_result_df(long_result) self.open_short_df = self.normalize_result_df(short_result)
def generate_targets(self): if pd_is_not_null(self.filter_result) and pd_is_not_null( self.score_result): # for long result1 = self.filter_result[self.filter_result.score] result2 = self.score_result[ self.score_result.score >= self.long_threshold] long_result = result2.loc[result1.index, :] # for short result1 = self.filter_result[~self.filter_result.score] result2 = self.score_result[ self.score_result.score <= self.short_threshold] short_result = result2.loc[result1.index, :] elif pd_is_not_null(self.score_result): long_result = self.score_result[ self.score_result.score >= self.long_threshold] short_result = self.score_result[ self.score_result.score <= self.short_threshold] else: long_result = self.filter_result[self.filter_result.score is True] short_result = self.filter_result[ self.filter_result.score is False] self.open_long_df = self.normalize_result_df(long_result) self.open_short_df = self.normalize_result_df(short_result)
def run(self): # 按不同类别抓取 # 编码 基金运作方式 # 401001 开放式基金 # 401002 封闭式基金 # 401003 QDII # 401004 FOF # 401005 ETF # 401006 LOF for operate_mode_id in (401001, 401002, 401005): year_count = 2 while True: latest = Fund.query_data( filters=[Fund.operate_mode_id == operate_mode_id], order=Fund.timestamp.desc(), limit=1, return_type="domain", ) start_timestamp = "2000-01-01" if latest: start_timestamp = latest[0].timestamp end_timestamp = min( next_date(start_timestamp, 365 * year_count), now_pd_timestamp()) df = run_query( table="finance.FUND_MAIN_INFO", conditions= f"operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}", parse_dates=["start_date", "end_date"], dtype={"main_code": str}, ) if not pd_is_not_null(df) or (df["start_date"].max().year < end_timestamp.year): year_count = year_count + 1 if pd_is_not_null(df): df.rename(columns={"start_date": "timestamp"}, inplace=True) df["timestamp"] = pd.to_datetime(df["timestamp"]) df["list_date"] = df["timestamp"] df["end_date"] = pd.to_datetime(df["end_date"]) df["code"] = df["main_code"] df["entity_id"] = df["code"].apply( lambda x: to_entity_id(entity_type="fund", jq_code=x)) df["id"] = df["entity_id"] df["entity_type"] = "fund" df["exchange"] = "sz" df_to_db(df, data_schema=Fund, provider=self.provider, force_update=self.force_update) self.logger.info( f"persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}" ) if is_same_date(end_timestamp, now_pd_timestamp()): break
def generate_targets(self): if pd_is_not_null(self.filter_result) and pd_is_not_null( self.score_result): # for long result1 = self.filter_result[self.filter_result.score] result2 = self.score_result[ self.score_result.score >= self.long_threshold] long_result = result2.loc[result1.index, :] # for short result1 = self.filter_result[~self.filter_result.score] result2 = self.score_result[ self.score_result.score <= self.short_threshold] short_result = result2.loc[result1.index, :] elif pd_is_not_null(self.score_result): long_result = self.score_result[ self.score_result.score >= self.long_threshold] short_result = self.score_result[ self.score_result.score <= self.short_threshold] else: long_result = self.filter_result[self.filter_result.score == True] short_result = self.filter_result[self.filter_result.score == False] # filter in blocks if self.portfolio_selector: if pd_is_not_null(self.portfolio_selector.open_long_df): long_result = long_result[lambda df: self.in_block( long_result, target_type=TargetType.open_long)] if pd_is_not_null(self.portfolio_selector.open_short_df): short_result = short_result[lambda df: self.in_block( short_result, target_type=TargetType.open_short)] self.open_long_df = self.normalize_result_df(long_result) self.open_short_df = self.normalize_result_df(short_result)
def run(self): # 按不同类别抓取 # 编码 基金运作方式 # 401001 开放式基金 # 401002 封闭式基金 # 401003 QDII # 401004 FOF # 401005 ETF # 401006 LOF for operate_mode_id in (401001, 401002, 401005): year_count = 2 while True: latest = Fund.query_data( region=self.region, filters=[Fund.operate_mode_id == operate_mode_id], order=Fund.timestamp.desc(), limit=1, return_type='domain') start_timestamp = '2000-01-01' if latest: start_timestamp = latest[0].timestamp end_timestamp = min( next_date(start_timestamp, 365 * year_count), now_pd_timestamp(self.region)) df = jq_run_query( table='finance.FUND_MAIN_INFO', conditions= f'operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}', parse_dates=['start_date', 'end_date'], dtype={'main_code': str}) if not pd_is_not_null(df) or (df['start_date'].max().year < end_timestamp.year): year_count = year_count + 1 if pd_is_not_null(df): df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['code'] = df['main_code'] df['entity_id'] = df['code'].apply( lambda x: to_entity_id(entity_type='fund', jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = 'fund' df['exchange'] = 'sz' df_to_db(df, ref_df=None, region=self.region, data_schema=Fund, provider=self.provider) self.logger.info( f'persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}' ) if is_same_date(end_timestamp, now_pd_timestamp(self.region)): break
def run(self): """ """ if self.factors: filters = [] scores = [] for factor in self.factors: if is_filter_result_df(factor.result_df): df = factor.result_df[['filter_result']] if pd_is_not_null(df): df.columns = ['score'] filters.append(df) else: raise Exception('no data for factor:{},{}'.format(factor.factor_name, factor)) if is_score_result_df(factor.result_df): df = factor.result_df[['score_result']] if pd_is_not_null(df): df.columns = ['score'] scores.append(df) else: raise Exception('no data for factor:{},{}'.format(factor.factor_name, factor)) if filters: if self.select_mode == SelectMode.condition_and: self.filter_result = list(accumulate(filters, func=operator.__and__))[-1] else: self.filter_result = list(accumulate(filters, func=operator.__or__))[-1] if scores: self.score_result = list(accumulate(scores, func=operator.__add__))[-1] / len(scores) self.generate_targets()
def select_short_targets_from_levels(self, timestamp): # 因为不能做空,只从持仓里面算出需要卖的个股 positions = self.get_current_positions() if positions: entity_ids = [position.entity_id for position in positions] # 有效跌破5日线,卖出 input_df = get_kdata(region=self.region, entity_ids=entity_ids, start_timestamp=timestamp - datetime.timedelta(20), end_timestamp=timestamp, columns=['entity_id', 'close'], index=['entity_id', 'timestamp']) ma_df = input_df['close'].groupby(level=0).rolling(window=5, min_periods=5).mean() ma_df = ma_df.reset_index(level=0, drop=True) input_df['ma5'] = ma_df s = input_df['close'] < input_df['ma5'] input_df = s.to_frame(name='score') # 连续3日收在5日线下 df = input_df['score'].groupby(level=0).rolling(window=3, min_periods=3).apply( lambda x: np.logical_and.reduce(x)) df = df.reset_index(level=0, drop=True) input_df['score'] = df result_df = input_df[input_df['score'] == 1.0] if pd_is_not_null(result_df): short_df = result_df.loc[(slice(None), slice(timestamp, timestamp)), :] if pd_is_not_null(short_df): return short_df.index.get_level_values(0).tolist()
def record(self, entity, start, end, size, timestamps): if self.adjust_type == AdjustType.hfq: fq_ref_date = '2000-01-01' else: fq_ref_date = to_time_str(now_pd_timestamp()) if not self.end_timestamp: df = get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], fq_ref_date=fq_ref_date, include_now=self.real_time) else: end_timestamp = to_time_str(self.end_timestamp) df = get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_dt=end_timestamp, fq_ref_date=fq_ref_date, include_now=False) if pd_is_not_null(df): df['name'] = entity.name df.rename(columns={'money': 'turnover', 'date': 'timestamp'}, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = self.level.value df['code'] = entity.code # 判断是否需要重新计算之前保存的前复权数据 if self.adjust_type == AdjustType.qfq: check_df = df.head(1) check_date = check_df['timestamp'][0] current_df = get_kdata(entity_id=entity.id, provider=self.provider, start_timestamp=check_date, end_timestamp=check_date, limit=1, level=self.level, adjust_type=self.adjust_type) if pd_is_not_null(current_df): old = current_df.iloc[0, :]['close'] new = check_df['close'][0] # 相同时间的close不同,表明前复权需要重新计算 if round(old, 2) != round(new, 2): qfq_factor = new / old last_timestamp = pd.Timestamp(check_date) self.recompute_qfq(entity, qfq_factor=qfq_factor, last_timestamp=last_timestamp) def generate_kdata_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def update_factor_details(region: Region, factor, entity_type, entity, levels, columns, trader_index, schema_name): if factor and entity_type and entity and levels: sub_df = None # add sub graph if columns: if type(columns) == str: columns = [columns] columns = columns + ['entity_id', 'timestamp'] schema: Mixin = get_schema_by_name(name=schema_name) sub_df = schema.query_data(region=region, entity_id=entity, columns=columns) # add trading signals as annotation annotation_df = None if trader_index is not None: order_reader = order_readers[trader_index] annotation_df = order_reader.data_df.copy() annotation_df = annotation_df[annotation_df.entity_id == entity].copy() if pd_is_not_null(annotation_df): annotation_df['value'] = annotation_df['order_price'] annotation_df['flag'] = annotation_df['order_type'].apply( lambda x: order_type_flag(x)) annotation_df['color'] = annotation_df['order_type'].apply( lambda x: order_type_color(x)) print(annotation_df.tail()) if type(levels) is list and len(levels) >= 2: levels.sort() drawers = [] for level in levels: drawers.append(zvt_context.factor_cls_registry[factor]( entity_schema=zvt_context.entity_schema_map[entity_type], level=level, entity_ids=[entity]).drawer()) stacked = StackedDrawer(*drawers) return dcc.Graph(id=f'{factor}-{entity_type}-{entity}', figure=stacked.draw_kline(show=False, height=900)) else: if type(levels) is list: level = levels[0] else: level = levels drawer = zvt_context.factor_cls_registry[factor]( entity_schema=zvt_context.entity_schema_map[entity_type], level=level, entity_ids=[entity], need_persist=False).drawer() if pd_is_not_null(sub_df): drawer.add_sub_df(sub_df) if pd_is_not_null(annotation_df): drawer.annotation_df = annotation_df return dcc.Graph(id=f'{factor}-{entity_type}-{entity}', figure=drawer.draw_kline(show=False, height=800)) raise dash.PreventUpdate()
def on_finish_entity(self, entity, http_session): super().on_finish_entity(entity, http_session) if not self.fetch_jq_timestamp: return # fill the timestamp for report published date the_data_list = get_data( region=self.region, data_schema=self.data_schema, provider=self.provider, entity_id=entity.id, order=self.data_schema.timestamp.asc(), return_type='domain', filters=[ self.data_schema.timestamp == self.data_schema.report_date ]) if the_data_list: if self.data_schema == FinanceFactor: for the_data in the_data_list: self.fill_timestamp_with_jq(entity, the_data) else: df = FinanceFactor.query_data( region=self.region, entity_id=entity.id, columns=[ FinanceFactor.timestamp, FinanceFactor.report_date, FinanceFactor.id ], filters=[ FinanceFactor.timestamp != FinanceFactor.report_date, FinanceFactor.report_date >= the_data_list[0].report_date, FinanceFactor.report_date <= the_data_list[-1].report_date ]) if pd_is_not_null(df): index_df(df, index='report_date', time_field='report_date') for the_data in the_data_list: if pd_is_not_null(df) and the_data.report_date in df.index: the_data.timestamp = df.at[the_data.report_date, 'timestamp'] self.logger.info( 'db fill {} {} timestamp:{} for report_date:{}'. format(self.data_schema, entity.id, the_data.timestamp, the_data.report_date)) session = get_db_session(region=self.region, provider=self.provider, data_schema=self.data_schema) session.commit() else: # self.logger.info( # 'waiting jq fill {} {} timestamp:{} for report_date:{}'.format(self.data_schema, # security_item.id, # the_data.timestamp, # the_data.report_date)) self.fill_timestamp_with_jq(entity, the_data)
def acc(self, input_df: pd.DataFrame, acc_df: pd.DataFrame, states: dict) -> (pd.DataFrame, dict): """ :param input_df: new input :param acc_df: previous result :param states: current states of the entity :return: new result and states """ g = input_df.groupby(level=0) if len(g.groups) == 1: entity_id = input_df.index[0][0] df = input_df.reset_index(level=0, drop=True) if pd_is_not_null(acc_df) and (entity_id == acc_df.index[0][0]): acc_one_df = acc_df.reset_index(level=0, drop=True) else: acc_one_df = None ret_df, state = self.acc_one(entity_id=entity_id, df=df, acc_df=acc_one_df, state=states.get(entity_id)) if pd_is_not_null(ret_df): ret_df["entity_id"] = entity_id ret_df = ret_df.set_index("entity_id", append=True).swaplevel(0, 1) ret_df["entity_id"] = entity_id return ret_df, {entity_id: state} return None, {entity_id: state} else: new_states = {} def cal_acc(x): entity_id = x.index[0][0] if pd_is_not_null(acc_df): acc_g = acc_df.groupby(level=0) acc_one_df = None if entity_id in acc_g.groups: acc_one_df = acc_g.get_group(entity_id) if pd_is_not_null(acc_one_df): acc_one_df = acc_one_df.reset_index(level=0, drop=True) else: acc_one_df = None one_result, state = self.acc_one( entity_id=entity_id, df=x.reset_index(level=0, drop=True), acc_df=acc_one_df, state=states.get(x.index[0][0]), ) new_states[entity_id] = state return one_result ret_df = g.apply(lambda x: cal_acc(x)) return ret_df, new_states
def do_compute(self): # 无状态的转换运算 if pd_is_not_null(self.data_df) and self.transformer: self.pipe_df = self.transformer.transform(self.data_df) # 有状态的累加运算 if pd_is_not_null(self.pipe_df) and self.accumulator: self.factor_df = self.accumulator.acc(self.pipe_df, self.factor_df) else: self.factor_df = self.pipe_df
def compute_factor(self): if self.not_load_data: return # 无状态的转换运算 if pd_is_not_null(self.data_df) and self.transformer: self.pipe_df = self.transformer.transform(self.data_df) else: self.pipe_df = self.data_df # 有状态的累加运算 if pd_is_not_null(self.pipe_df) and self.accumulator: self.factor_df, self.states = self.accumulator.acc(self.pipe_df, self.factor_df, self.states) else: self.factor_df = self.pipe_df
def record(self, entity, start, end, size, timestamps, http_session): if self.adjust_type == AdjustType.hfq: fq_ref_date = '2000-01-01' else: fq_ref_date = to_time_str(now_pd_timestamp(Region.CHN)) if not self.end_timestamp: df = jq_get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], fq_ref_date=fq_ref_date) else: end_timestamp = to_time_str(self.end_timestamp) df = jq_get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_date=end_timestamp, fq_ref_date=fq_ref_date) # self.logger.info("record {} for {}, size:{}".format(self.data_schema.__name__, entity.id, len(df))) if pd_is_not_null(df): # start_timestamp = to_time_str(df.iloc[1]['timestamp']) # end_timestamp = to_time_str(df.iloc[-1]['timestamp']) # 判断是否需要重新计算之前保存的前复权数据 if self.adjust_type == AdjustType.qfq: check_df = df.head(1) check_date = check_df['timestamp'][0] current_df = get_kdata(region=self.region, entity_id=entity.id, provider=self.provider, start_timestamp=check_date, end_timestamp=check_date, limit=1, level=self.level, adjust_type=self.adjust_type) if pd_is_not_null(current_df): old = current_df.iloc[0, :]['close'] new = check_df['close'][0] # 相同时间的close不同,表明前复权需要重新计算 if round(old, 2) != round(new, 2): qfq_factor = new / old last_timestamp = pd.Timestamp(check_date) self.recompute_qfq(entity, qfq_factor=qfq_factor, last_timestamp=last_timestamp) return df return None
def get_entity_ids(entity_type='stock', entity_schema: TradableEntity = None, exchanges=None, codes=None, provider=None, filters=None): df = get_entities(entity_type=entity_type, entity_schema=entity_schema, exchanges=exchanges, codes=codes, provider=provider, filters=filters) if pd_is_not_null(df): return df['entity_id'].to_list() return None
def draw(self, render='html', file_name=None, width=None, height=None, title=None, keep_ui_state=True, annotation_df=None, target_type: TargetType = TargetType.open_long): if target_type == TargetType.open_long: df = self.open_long_df.copy() elif target_type == TargetType.open_short: df = self.open_short_df.copy() df['target_type'] = target_type.value if pd_is_not_null(df): df = df.reset_index(drop=False) drawer = Drawer(df) drawer.draw_table(width=width, height=height, title=title, keep_ui_state=keep_ui_state)
def load_factor(self): if self.dry_run: #: 如果只是为了计算因子,只需要读取acc_window的factor_df if self.accumulator is not None: self.factor_df = self.load_window_df( provider="zvt", data_schema=self.factor_schema, window=self.accumulator.acc_window) else: self.factor_df = get_data( provider="zvt", data_schema=self.factor_schema, start_timestamp=self.start_timestamp, entity_ids=self.entity_ids, end_timestamp=self.end_timestamp, index=[self.category_field, self.time_field], ) col_map_object_hook = self.factor_col_map_object_hook() if pd_is_not_null(self.factor_df) and col_map_object_hook: for col in col_map_object_hook: if col in self.factor_df.columns: self.factor_df[col] = self.factor_df[col].apply( lambda x: json.loads( x, object_hook=col_map_object_hook.get(col)) if x else None)
def persist_factor(self): df = self.factor_df.copy() #: encode json columns if pd_is_not_null(df) and self.factor_col_map_object_hook(): for col in self.factor_col_map_object_hook(): if col in df.columns: df[col] = df[col].apply( lambda x: json.dumps(x, cls=self.state_encoder())) if self.states: g = df.groupby(level=0) for entity_id in self.states: state = self.states[entity_id] try: if state: self.persist_state(entity_id=entity_id) if entity_id in g.groups: df_to_db(df=df.loc[(entity_id, )], data_schema=self.factor_schema, provider="zvt", force_update=False) except Exception as e: self.logger.error( f"{self.name} {entity_id} save state error") self.logger.exception(e) #: clear them if error happen self.clear_state_data(entity_id) else: df_to_db(df=df, data_schema=self.factor_schema, provider="zvt", force_update=False)
def on_finish_entity(self, entity, http_session): kdatas = get_kdata(region=self.region, provider=self.provider, entity_id=entity.id, level=IntervalLevel.LEVEL_1DAY.value, order=Etf1dKdata.timestamp.asc(), return_type='domain', filters=[Etf1dKdata.cumulative_net_value.is_(None)]) if kdatas and len(kdatas) > 0: start = kdatas[0].timestamp end = kdatas[-1].timestamp # 从东方财富获取基金累计净值 df = self.fetch_cumulative_net_value(entity, start, end, http_session) if pd_is_not_null(df): for kdata in kdatas: if kdata.timestamp in df.index: kdata.cumulative_net_value = df.loc[kdata.timestamp, 'LJJZ'] kdata.change_pct = df.loc[kdata.timestamp, 'JZZZL'] session = get_db_session(region=self.region, provider=self.provider, data_schema=self.data_schema) session.commit() self.logger.info(f'{entity.code} - {entity.name}累计净值更新完成...')
def register_data_listener(self, listener): if listener not in self.data_listeners: self.data_listeners.append(listener) # notify it once after registered if pd_is_not_null(self.data_df): listener.on_data_loaded(self.data_df)
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: df = run_query( table='finance.STK_HK_HOLD_INFO', conditions= f'link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}') print(df) if pd_is_not_null(df): df.rename(columns={ 'day': 'timestamp', 'link_id': 'holder_code', 'link_name': 'holder_name' }, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = df['code'].apply( lambda x: to_entity_id(entity_type='stock', jq_code=x)) df['code'] = df['code'].apply(lambda x: x.split('.')[0]) # id格式为:{holder_name}_{entity_id}_{timestamp} df['id'] = df[['holder_name', 'entity_id', 'timestamp']].apply( lambda se: "{}_{}_{}".format( se['holder_name'], se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)), axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: df = run_query( table="finance.STK_HK_HOLD_INFO", conditions= f"link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}") print(df) if pd_is_not_null(df): df.rename(columns={ "day": "timestamp", "link_id": "holder_code", "link_name": "holder_name" }, inplace=True) df["timestamp"] = pd.to_datetime(df["timestamp"]) df["entity_id"] = df["code"].apply( lambda x: to_entity_id(entity_type="stock", jq_code=x)) df["code"] = df["code"].apply(lambda x: x.split(".")[0]) # id格式为:{holder_name}_{entity_id}_{timestamp} df["id"] = df[["holder_name", "entity_id", "timestamp"]].apply( lambda se: "{}_{}_{}".format( se["holder_name"], se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)), axis=1, ) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def record(self, entity, start, end, size, timestamps): q = query(finance.FUND_PORTFOLIO_STOCK).filter(finance.FUND_PORTFOLIO_STOCK.pub_date >= start).filter( finance.FUND_PORTFOLIO_STOCK.code == entity.code) df = finance.run_query(q) if pd_is_not_null(df): # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 df['timestamp'] = pd.to_datetime(df['pub_date']) df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True) df['proportion'] = df['proportion'] * 0.01 df = portfolio_relate_stock(df, entity) df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x)) df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['report_date'] = pd.to_datetime(df['period_end']) df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x)) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) # self.logger.info(df.tail()) self.logger.info(f"persist etf {entity.code} portfolio success") return None
def load_factor(self): # read state states: List[FactorState] = FactorState.query_data( filters=[FactorState.factor_name == self.factor_name], entity_ids=self.entity_ids, return_type='domain') if states: for state in states: self.states[state.entity_id] = self.decode_state(state.state) if self.dry_run: # 如果只是为了计算因子,只需要读取acc_window的factor_df if self.accumulator is not None: self.factor_df = self.load_window_df( provider='zvt', data_schema=self.factor_schema, window=self.accumulator.acc_window) else: self.factor_df = get_data( provider='zvt', data_schema=self.factor_schema, start_timestamp=self.start_timestamp, entity_ids=self.entity_ids, end_timestamp=self.end_timestamp, index=[self.category_field, self.time_field]) col_map_object_hook = self.factor_col_map_object_hook() if pd_is_not_null(self.factor_df) and col_map_object_hook: for col in col_map_object_hook: if col in self.factor_df.columns: self.factor_df[col] = self.factor_df[col].apply( lambda x: json.loads( x, object_hook=col_map_object_hook.get(col)) if x else None)
def run(self): from zvt.api import get_kdata bond_data = get_kdata(entity_id='bond_cn_EMM00166466') now_date = to_time_str(now_pd_timestamp()) if bond_data.empty: # 初始时间定在2007年 start = '2007-01-01' else: start = to_time_str(bond_data.timestamp.max()) # EMM00166466 中债国债到期收益率:10年 df = c.edb("EMM00166466", f"IsLatest=0,StartDate={start},EndDate={now_date},ispandas=1") if pd_is_not_null(df): df['name'] = "中债国债到期收益率:10年" df.rename(columns={'RESULT': 'data_value', 'DATES': 'timestamp'}, inplace=True) df['entity_id'] = 'bond_cn_EMM00166466' df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'emquantapi' df['exchange'] = 'cn' df['level'] = '1d' df['code'] = "EMM00166466" def generate_kdata_id(se): return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def record(self, entity, start, end, size, timestamps): if not end: end = to_time_str(now_pd_timestamp()) start = to_time_str(start) df = finance.run_query( query(finance.FUND_DIVIDEND).filter( finance.FUND_DIVIDEND.code == entity.code, finance.FUND_DIVIDEND.pub_date >= start).limit(20)) df.rename(columns=FundDividendDetail.get_data_map(self), inplace=True) df.dropna(subset=['dividend_date'], inplace=True) if pd_is_not_null(df): df.reset_index(drop=True, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df.announce_date) df['provider'] = 'joinquant' df['code'] = entity.code def generate_id(se): return "{}_{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY), se.name) df.reset_index(drop=True, inplace=True) df.index += 1 df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def get_trading_signals_figure(order_reader: OrderReader, entity_id: str, start_timestamp=None, end_timestamp=None, adjust_type=None): entity_type, _, _ = decode_entity_id(entity_id) data_schema = get_kdata_schema(entity_type=entity_type, level=order_reader.level, adjust_type=adjust_type) if not start_timestamp: start_timestamp = order_reader.start_timestamp if not end_timestamp: end_timestamp = order_reader.end_timestamp kdata_reader = DataReader( entity_ids=[entity_id], data_schema=data_schema, entity_schema=zvt_context.tradable_schema_map.get(entity_type), start_timestamp=start_timestamp, end_timestamp=end_timestamp, level=order_reader.level, ) # generate the annotation df order_reader.move_on(timeout=0) df = order_reader.data_df.copy() df = df[df.entity_id == entity_id].copy() if pd_is_not_null(df): df["value"] = df["order_price"] df["flag"] = df["order_type"].apply(lambda x: order_type_flag(x)) df["color"] = df["order_type"].apply(lambda x: order_type_color(x)) print(df.tail()) drawer = Drawer(main_df=kdata_reader.data_df, annotation_df=df) return drawer.draw_kline(show=False, height=800)
def on_trading_signal(self, trading_signal: TradingSignal): entity_id = trading_signal.entity_id happen_timestamp = trading_signal.happen_timestamp order_type = AccountService.trading_signal_to_order_type(trading_signal.trading_signal_type) trading_level = trading_signal.trading_level.value if order_type: try: kdata = get_kdata(provider=self.provider, entity_id=entity_id, level=trading_level, start_timestamp=happen_timestamp, end_timestamp=happen_timestamp, limit=1, adjust_type=self.adjust_type) except Exception as e: self.logger.error(e) raise WrongKdataError("could not get kdata") if pd_is_not_null(kdata): entity_type, _, _ = decode_entity_id(kdata['entity_id'][0]) the_price = kdata['close'][0] if the_price: self.order(entity_id=entity_id, current_price=the_price, current_timestamp=happen_timestamp, order_pct=trading_signal.position_pct, order_money=trading_signal.order_money, order_type=order_type) else: self.logger.warning( 'ignore trading signal,wrong kdata,entity_id:{},timestamp:{},kdata:{}'.format(entity_id, happen_timestamp, kdata.to_dict( orient='records'))) else: self.logger.warning( 'ignore trading signal,could not get kdata,entity_id:{},timestamp:{}'.format(entity_id, happen_timestamp))
def filter_selector_long_targets(self, timestamp, selector: TargetSelector, long_targets: List[str]) -> List[str]: # 选择器选出的个股,再做进一步处理 if selector.level == IntervalLevel.LEVEL_1DAY: if not long_targets: return None entity_ids = [] for entity_id in long_targets: # 获取最近3k线 df = get_kdata( region=self.region, entity_id=entity_id, start_timestamp=timestamp - datetime.timedelta(20), end_timestamp=timestamp, columns=['entity_id', 'close', 'open', 'high', 'low']) if pd_is_not_null(df) and len(df) >= 3: df = df.iloc[-3:] # 收阳 se = df['close'] > df['open'] positive = np.all(se) # 高点比高点高 trending = df['high'][0] < df['high'][1] < df['high'][2] if positive and trending: entity_ids.append(entity_id) return entity_ids return long_targets