def record(self, entity, start, end, size, timestamps): if start: start_date = to_time_str(next_date(start)) else: start_date = None datas = em_api.get_dragon_and_tiger(code=entity.code, start_date=start_date) if datas: records = [] for data in datas: timestamp = to_pd_timestamp(data["TRADE_DATE"]) record = { "id": "{}_{}_{}".format( entity.id, data["TRADE_ID"], to_time_str(timestamp, fmt=TIME_FORMAT_DAY)), "entity_id": entity.id, "timestamp": timestamp, "code": entity.code, "name": entity.name, "reason": data["EXPLANATION"], "turnover": data["ACCUM_AMOUNT"], "change_pct": data["CHANGE_RATE"], "net_in": data["NET_BUY"], } # 营业部列表 deps = data["LIST"] for dep in deps: flag = "" if dep["TRADE_DIRECTION"] == "0" else "_" rank = dep["RANK"] dep_name = f"dep{flag}{rank}" dep_in = f"{dep_name}_in" dep_out = f"{dep_name}_out" dep_rate = f"{dep_name}_rate" record[dep_name] = dep["OPERATEDEPT_NAME"] record[dep_in] = dep["BUY_AMT_REAL"] record[dep_out] = dep["SELL_AMT_REAL"] record[dep_rate] = (dep["BUY_RATIO"] if dep["BUY_RATIO"] else 0) - (dep["SELL_RATIO"] if dep["SELL_RATIO"] else 0) records.append(record) df = pd.DataFrame.from_records(records) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) else: self.logger.info(f"no data for {entity.id}")
def get_dragon_and_tiger_list(start_date, end_date=None): start_date = to_time_str(start_date) if not end_date: end_date = now_timestamp() end_date = to_time_str(end_date) return get_em_data( request_type="RPT_DAILYBILLBOARD_DETAILS", fields="ALL", source="DataCenter", filters=f"(TRADE_DATE>='{start_date}')(TRADE_DATE<='{end_date}')", sort_by="TRADE_DATE,SECURITY_CODE", sort="asc,asc", )
def record(self, entity, start, end, size, timestamps): # 上证 if entity.code == '000001': all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.entity_id.like('stock_sh%')]) # 深证 elif entity.code == '399001': all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.entity_id.like('stock_sz%')]) # 创业板 elif entity.code == '399006': all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.code.like('300%')]) if pd_is_not_null(all_df): g = all_df.groupby('timestamp') for timestamp, df in g: se = pd.Series({ 'id': "{}_{}".format(entity.id, to_time_str(timestamp)), 'entity_id': entity.id, 'timestamp': timestamp, 'code': entity.code, 'name': entity.name }) for col in [ 'net_main_inflows', 'net_huge_inflows', 'net_big_inflows', 'net_medium_inflows', 'net_small_inflows' ]: se[col] = df[col].sum() for col in [ 'net_main_inflow_rate', 'net_huge_inflow_rate', 'net_big_inflow_rate', 'net_medium_inflow_rate', 'net_small_inflow_rate' ]: se[col] = df[col].sum() / len(df) index_df = se.to_frame().T self.logger.info(index_df) df_to_db(df=index_df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def run(self): timestamps = self.get_tag_timestamps() for timestamp in timestamps: logger.info(f"tag to {timestamp}") self.tag(timestamp=timestamp) self.state = {"current_timestamp": to_time_str(timestamp)} self.persist_state()
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: the_date = to_time_str(timestamp) result = get_holders(code=entity.code, end_date=the_date) if result: holders = [] new_actors = [] for item in result: # 机构 if item['IS_HOLDORG'] == '1': domains: List[ActorMeta] = ActorMeta.query_data(filters=[ActorMeta.code == item['HOLDER_CODE']], return_type='domain') if not domains: actor_type = ActorType.corporation.value actor = ActorMeta(entity_id=f'{actor_type}_cn_{item["HOLDER_CODE"]}', id=f'{actor_type}_cn_{item["HOLDER_CODE"]}', entity_type=actor_type, exchange='cn', code=item["HOLDER_CODE"], name=item["HOLDER_NAME"]) else: actor = domains[0] else: actor_type = ActorType.individual.value actor = ActorMeta(entity_id=f'{actor_type}_cn_{item["HOLDER_NAME"]}', id=f'{actor_type}_cn_{item["HOLDER_NAME"]}', entity_type=actor_type, exchange='cn', code=item["HOLDER_NAME"], name=item["HOLDER_NAME"]) new_actors.append(actor.__dict__) holder = {'id': f'{entity.entity_id}_{the_date}_{actor.entity_id}', 'entity_id': entity.entity_id, 'timestamp': timestamp, 'code': entity.code, 'name': entity.name, 'actor_id': actor.entity_id, 'actor_type': actor.entity_type, 'actor_code': actor.code, 'actor_name': actor.name, 'report_date': timestamp, 'report_period': to_report_period_type(timestamp), 'holding_numbers': item['HOLD_NUM'], 'holding_ratio': value_to_pct(item['HOLD_NUM_RATIO'], default=0)} holders.append(holder) if holders: df = pd.DataFrame.from_records(holders) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) if new_actors: df = pd.DataFrame.from_records(new_actors) df_to_db(data_schema=ActorMeta, df=df, provider=self.provider, force_update=False)
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: the_date = to_time_str(timestamp) self.logger.info(f"to {entity.code} {the_date}") for actor_type in ActorType: if actor_type == ActorType.private_equity or actor_type == ActorType.individual: continue result = get_ii_holder( code=entity.code, report_date=the_date, org_type=actor_type_to_org_type(actor_type) ) if result: holders = [ { "id": f'{entity.entity_id}_{the_date}_{actor_type.value}_cn_{item["HOLDER_CODE"]}', "entity_id": entity.entity_id, "timestamp": timestamp, "code": entity.code, "name": entity.name, "actor_id": f'{actor_type.value}_cn_{item["HOLDER_CODE"]}', "actor_type": actor_type.value, "actor_code": item["HOLDER_CODE"], "actor_name": f'{item["HOLDER_NAME"]}', "report_date": timestamp, "report_period": to_report_period_type(timestamp), "holding_numbers": item["TOTAL_SHARES"], "holding_ratio": value_to_pct(item["FREESHARES_RATIO"], 0), "holding_values": item["HOLD_VALUE"], } for item in result ] df = pd.DataFrame.from_records(holders) df_to_db( data_schema=self.data_schema, df=df, provider=self.provider, force_update=True, drop_duplicates=True, ) # save the actors actors = [ { "id": f'{actor_type.value}_cn_{item["HOLDER_CODE"]}', "entity_id": f'{actor_type.value}_cn_{item["HOLDER_CODE"]}', "entity_type": actor_type.value, "exchange": "cn", "code": item["HOLDER_CODE"], "name": f'{item["HOLDER_NAME"]}', } for item in result ] df1 = pd.DataFrame.from_records(actors) df_to_db( data_schema=ActorMeta, df=df1, provider=self.provider, force_update=False, drop_duplicates=True )
def default(self, object): if isinstance(object, pd.Series): return object.to_dict() elif isinstance(object, pd.Timestamp): return to_time_str(object, fmt=TIME_FORMAT_ISO8601) elif isinstance(object, Enum): return object.value elif isinstance(object, Bean): return object.dict() else: return super().default(object)
def get_tag_domain(self, entity_id, timestamp, **fill_kv): the_date = to_time_str(timestamp, fmt=TIME_FORMAT_DAY) the_id = f"{entity_id}_{the_date}" the_domain = self.data_schema.get_one(id=the_id) if the_domain: for k, v in fill_kv.items(): exec(f"the_domain.{k}=v") else: return self.data_schema(id=the_id, entity_id=entity_id, timestamp=to_pd_timestamp(the_date), **fill_kv) return the_domain
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: the_date = to_time_str(timestamp) self.logger.info(f'to {entity.code} {the_date}') for actor_type in ActorType: if actor_type == ActorType.private_equity or actor_type == ActorType.individual: continue result = get_ii_summary( code=entity.code, report_date=the_date, org_type=actor_type_to_org_type(actor_type)) if result: summary_list = [{ 'id': f'{entity.entity_id}_{the_date}_{actor_type.value}', 'entity_id': entity.entity_id, 'timestamp': timestamp, 'code': entity.code, 'name': entity.name, 'actor_type': actor_type.value, 'actor_count': item['TOTAL_ORG_NUM'], 'report_date': timestamp, 'report_period': to_report_period_type(timestamp), 'change_ratio': value_to_pct(item['CHANGE_RATIO'], default=1), 'is_complete': item['IS_COMPLETE'], 'holding_numbers': item['TOTAL_FREE_SHARES'], 'holding_ratio': value_to_pct(item['TOTAL_SHARES_RATIO'], default=0), 'holding_values': item['TOTAL_MARKET_CAP'] } for item in result] df = pd.DataFrame.from_records(summary_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True, drop_duplicates=True)
def get_tag_domains(self, entity_ids, timestamp, **fill_kv): the_date = to_time_str(timestamp, fmt=TIME_FORMAT_DAY) ids = [f"{entity_id}_{the_date}" for entity_id in entity_ids] the_domains = self.data_schema.query_data(ids=ids, return_type="domain") if the_domains: for the_domain in the_domains: for k, v in fill_kv.items(): exec(f"the_domain.{k}=v") current_ids = [item.id for item in the_domains] need_new_ids = set(ids) - set(current_ids) new_domains = [ self.data_schema(id=f"{entity_id}_{the_date}", entity_id=entity_id, timestamp=to_pd_timestamp(the_date), **fill_kv) for entity_id in need_new_ids ] return the_domains + new_domains
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: the_date = to_time_str(timestamp) self.logger.info(f"to {entity.code} {the_date}") for actor_type in ActorType: if actor_type == ActorType.private_equity or actor_type == ActorType.individual: continue result = get_ii_summary( code=entity.code, report_date=the_date, org_type=actor_type_to_org_type(actor_type) ) if result: summary_list = [ { "id": f"{entity.entity_id}_{the_date}_{actor_type.value}", "entity_id": entity.entity_id, "timestamp": timestamp, "code": entity.code, "name": entity.name, "actor_type": actor_type.value, "actor_count": item["TOTAL_ORG_NUM"], "report_date": timestamp, "report_period": to_report_period_type(timestamp), "change_ratio": value_to_pct(item["CHANGE_RATIO"], default=1), "is_complete": item["IS_COMPLETE"], "holding_numbers": item["TOTAL_FREE_SHARES"], "holding_ratio": value_to_pct(item["TOTAL_SHARES_RATIO"], default=0), "holding_values": item["TOTAL_MARKET_CAP"], } for item in result ] df = pd.DataFrame.from_records(summary_list) df_to_db( data_schema=self.data_schema, df=df, provider=self.provider, force_update=True, drop_duplicates=True, )
def record(self, entity, start, end, size, timestamps): # 上证 if entity.code == "000001": all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.entity_id.like("stock_sh%")] ) # 深证 elif entity.code == "399001": all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.entity_id.like("stock_sz%")] ) # 创业板 elif entity.code == "399006": all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.code.like("300%")] ) # 科创板 elif entity.code == "000688": all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.code.like("688%")] ) if pd_is_not_null(all_df): g = all_df.groupby("timestamp") for timestamp, df in g: se = pd.Series( { "id": "{}_{}".format(entity.id, to_time_str(timestamp)), "entity_id": entity.id, "timestamp": timestamp, "code": entity.code, "name": entity.name, } ) for col in [ "net_main_inflows", "net_huge_inflows", "net_big_inflows", "net_medium_inflows", "net_small_inflows", ]: se[col] = df[col].sum() for col in [ "net_main_inflow_rate", "net_huge_inflow_rate", "net_big_inflow_rate", "net_medium_inflow_rate", "net_small_inflow_rate", ]: se[col] = df[col].sum() / len(df) index_df = se.to_frame().T self.logger.info(index_df) df_to_db( df=index_df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update ) return None
def record(self, entity, start, end, size, timestamps): if not self.end_timestamp: df = get_money_flow(code=to_jq_entity_id(entity), date=to_time_str(start)) else: df = get_money_flow(code=to_jq_entity_id(entity), date=start, end_date=to_time_str(self.end_timestamp)) df = df.dropna() if pd_is_not_null(df): df['name'] = entity.name df.rename(columns={ 'date': 'timestamp', 'net_amount_main': 'net_main_inflows', 'net_pct_main': 'net_main_inflow_rate', 'net_amount_xl': 'net_huge_inflows', 'net_pct_xl': 'net_huge_inflow_rate', 'net_amount_l': 'net_big_inflows', 'net_pct_l': 'net_big_inflow_rate', 'net_amount_m': 'net_medium_inflows', 'net_pct_m': 'net_medium_inflow_rate', 'net_amount_s': 'net_small_inflows', 'net_pct_s': 'net_small_inflow_rate' }, inplace=True) # 转换到标准float inflows_cols = [ 'net_main_inflows', 'net_huge_inflows', 'net_big_inflows', 'net_medium_inflows', 'net_small_inflows' ] for col in inflows_cols: df[col] = pd.to_numeric(df[col], errors='coerce') df = df.dropna() if not pd_is_not_null(df): return None df[inflows_cols] = df[inflows_cols].apply(lambda x: x * 10000) inflow_rate_cols = [ 'net_main_inflow_rate', 'net_huge_inflow_rate', 'net_big_inflow_rate', 'net_medium_inflow_rate', 'net_small_inflow_rate' ] for col in inflow_rate_cols: df[col] = pd.to_numeric(df[col], errors='coerce') df = df.dropna() if not pd_is_not_null(df): return None df[inflow_rate_cols] = df[inflow_rate_cols].apply( lambda x: x / 100) # 计算总流入 df['net_inflows'] = df['net_huge_inflows'] + df[ 'net_big_inflows'] + df['net_medium_inflows'] + df[ 'net_small_inflows'] # 计算总流入率 amount = df['net_main_inflows'] / df['net_main_inflow_rate'] df['net_inflow_rate'] = df['net_inflows'] / amount df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['code'] = entity.code def generate_kdata_id(se): return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df = df.drop_duplicates(subset='id', keep='last') df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): ccxt_exchange = get_coin_exchange(entity.exchange) if ccxt_exchange.has['fetchOHLCV']: config = get_exchange_config(entity.exchange) limit = config['kdata_limit'] limit = min(size, limit) kdata_list = [] if config['support_since'] and start: kdatas = ccxt_exchange.fetch_ohlcv( entity.code, timeframe=self.ccxt_trading_level, since=int(start.timestamp() * 1000)) else: kdatas = ccxt_exchange.fetch_ohlcv( entity.code, timeframe=self.ccxt_trading_level, limit=limit) for kdata in kdatas: current_timestamp = kdata[0] if self.level == IntervalLevel.LEVEL_1DAY: current_timestamp = to_time_str(current_timestamp) if self.level >= IntervalLevel.LEVEL_1DAY: kdata_id = "{}_{}".format(entity.id, current_timestamp, fmt=TIME_FORMAT_DAY) else: kdata_id = "{}_{}".format(entity.id, current_timestamp, fmt=TIME_FORMAT_ISO8601) kdata_json = { 'id': kdata_id, 'entity_id': entity.id, 'code': entity.code, 'name': entity.name, 'timestamp': to_pd_timestamp(current_timestamp), 'open': kdata[1], 'high': kdata[2], 'low': kdata[3], 'close': kdata[4], 'volume': kdata[5], 'provider': 'ccxt', 'level': self.level.value } kdata_list.append(kdata_json) if kdata_list: df = pd.DataFrame.from_records(kdata_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) else: self.logger.warning("exchange:{} not support fetchOHLCV".format( entity.exchange))
def generate_kdata_id(se): return "{}_{}".format( se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY))
def record(self, entity, start, end, size, timestamps): if not self.end_timestamp: df = get_money_flow(code=to_jq_entity_id(entity), date=to_time_str(start)) else: df = get_money_flow(code=to_jq_entity_id(entity), date=start, end_date=to_time_str(self.end_timestamp)) df = df.dropna() if pd_is_not_null(df): df["name"] = entity.name df.rename( columns={ "date": "timestamp", "net_amount_main": "net_main_inflows", "net_pct_main": "net_main_inflow_rate", "net_amount_xl": "net_huge_inflows", "net_pct_xl": "net_huge_inflow_rate", "net_amount_l": "net_big_inflows", "net_pct_l": "net_big_inflow_rate", "net_amount_m": "net_medium_inflows", "net_pct_m": "net_medium_inflow_rate", "net_amount_s": "net_small_inflows", "net_pct_s": "net_small_inflow_rate", }, inplace=True, ) # 转换到标准float inflows_cols = [ "net_main_inflows", "net_huge_inflows", "net_big_inflows", "net_medium_inflows", "net_small_inflows", ] for col in inflows_cols: df[col] = pd.to_numeric(df[col], errors="coerce") df = df.dropna() if not pd_is_not_null(df): return None df[inflows_cols] = df[inflows_cols].apply(lambda x: x * 10000) inflow_rate_cols = [ "net_main_inflow_rate", "net_huge_inflow_rate", "net_big_inflow_rate", "net_medium_inflow_rate", "net_small_inflow_rate", ] for col in inflow_rate_cols: df[col] = pd.to_numeric(df[col], errors="coerce") df = df.dropna() if not pd_is_not_null(df): return None df[inflow_rate_cols] = df[inflow_rate_cols].apply( lambda x: x / 100) # 计算总流入 df["net_inflows"] = (df["net_huge_inflows"] + df["net_big_inflows"] + df["net_medium_inflows"] + df["net_small_inflows"]) # 计算总流入率 amount = df["net_main_inflows"] / df["net_main_inflow_rate"] df["net_inflow_rate"] = df["net_inflows"] / amount df["entity_id"] = entity.id df["timestamp"] = pd.to_datetime(df["timestamp"]) df["provider"] = "joinquant" df["code"] = entity.code def generate_kdata_id(se): return "{}_{}".format( se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)) df["id"] = df[["entity_id", "timestamp"]].apply(generate_kdata_id, axis=1) df = df.drop_duplicates(subset="id", keep="last") df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: the_date = to_time_str(timestamp) result = get_holders(code=entity.code, end_date=the_date) if result: holders = [] new_actors = [] for item in result: # 机构 if item["IS_HOLDORG"] == "1": domains: List[ActorMeta] = ActorMeta.query_data( filters=[ActorMeta.code == item["HOLDER_CODE"]], return_type="domain") if not domains: actor_type = ActorType.corporation.value actor = ActorMeta( entity_id= f'{actor_type}_cn_{item["HOLDER_CODE"]}', id=f'{actor_type}_cn_{item["HOLDER_CODE"]}', entity_type=actor_type, exchange="cn", code=item["HOLDER_CODE"], name=item["HOLDER_NAME"], ) else: actor = domains[0] else: actor_type = ActorType.individual.value actor = ActorMeta( entity_id=f'{actor_type}_cn_{item["HOLDER_NAME"]}', id=f'{actor_type}_cn_{item["HOLDER_NAME"]}', entity_type=actor_type, exchange="cn", code=item["HOLDER_NAME"], name=item["HOLDER_NAME"], ) new_actors.append(actor.__dict__) holder = { "id": f"{entity.entity_id}_{the_date}_{actor.entity_id}", "entity_id": entity.entity_id, "timestamp": timestamp, "code": entity.code, "name": entity.name, "actor_id": actor.entity_id, "actor_type": actor.entity_type, "actor_code": actor.code, "actor_name": actor.name, "report_date": timestamp, "report_period": to_report_period_type(timestamp), "holding_numbers": item["HOLD_NUM"], "holding_ratio": value_to_pct(item["HOLD_NUM_RATIO"], default=0), } holders.append(holder) if holders: df = pd.DataFrame.from_records(holders) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) if new_actors: df = pd.DataFrame.from_records(new_actors) df_to_db(data_schema=ActorMeta, df=df, provider=self.provider, force_update=False)
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: the_date = to_time_str(timestamp) result = get_free_holders(code=entity.code, end_date=the_date) if result: holders = [] new_actors = [] for item in result: # {'END_DATE': '2021-03-31 00:00:00', # 'FREE_HOLDNUM_RATIO': 0.631949916991, # 'FREE_RATIO_QOQ': '-5.33046217', # 'HOLDER_CODE': '161606', # 'HOLDER_CODE_OLD': '161606', # 'HOLDER_NAME': '交通银行-融通行业景气证券投资基金', # 'HOLDER_RANK': 10, # 'HOLD_NUM': 39100990, # 'IS_HOLDORG': '1', # 'SECUCODE': '000338.SZ'} # 机构 if item['IS_HOLDORG'] == '1': domains: List[ActorMeta] = ActorMeta.query_data(filters=[ActorMeta.code == item['HOLDER_CODE']], return_type='domain') if not domains: actor_type = ActorType.corporation.value actor = ActorMeta(entity_id=f'{actor_type}_cn_{item["HOLDER_CODE"]}', id=f'{actor_type}_cn_{item["HOLDER_CODE"]}', entity_type=actor_type, exchange='cn', code=item["HOLDER_CODE"], name=item["HOLDER_NAME"]) else: actor = domains[0] else: actor_type = ActorType.individual.value actor = ActorMeta(entity_id=f'{actor_type}_cn_{item["HOLDER_NAME"]}', id=f'{actor_type}_cn_{item["HOLDER_NAME"]}', entity_type=actor_type, exchange='cn', code=item["HOLDER_NAME"], name=item["HOLDER_NAME"]) new_actors.append(actor.__dict__) holder = {'id': f'{entity.entity_id}_{the_date}_{actor.entity_id}', 'entity_id': entity.entity_id, 'timestamp': timestamp, 'code': entity.code, 'name': entity.name, 'actor_id': actor.entity_id, 'actor_type': actor.entity_type, 'actor_code': actor.code, 'actor_name': actor.name, 'report_date': timestamp, 'report_period': to_report_period_type(timestamp), 'holding_numbers': item['HOLD_NUM'], 'holding_ratio': item['FREE_HOLDNUM_RATIO']} holders.append(holder) if holders: df = pd.DataFrame.from_records(holders) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) if new_actors: df = pd.DataFrame.from_records(new_actors) df_to_db(data_schema=ActorMeta, df=df, provider=self.provider, force_update=False)
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: the_date = to_time_str(timestamp) self.logger.info(f'to {entity.code} {the_date}') for actor_type in ActorType: if actor_type == ActorType.private_equity or actor_type == ActorType.individual: continue result = get_ii_holder( code=entity.code, report_date=the_date, org_type=actor_type_to_org_type(actor_type)) if result: holders = [{ 'id': f'{entity.entity_id}_{the_date}_{actor_type.value}_cn_{item["HOLDER_CODE"]}', 'entity_id': entity.entity_id, 'timestamp': timestamp, 'code': entity.code, 'name': entity.name, 'actor_id': f'{actor_type.value}_cn_{item["HOLDER_CODE"]}', 'actor_type': actor_type.value, 'actor_code': item["HOLDER_CODE"], 'actor_name': f'{item["HOLDER_NAME"]}', 'report_date': timestamp, 'report_period': to_report_period_type(timestamp), 'holding_numbers': item['TOTAL_SHARES'], 'holding_ratio': value_to_pct(item['FREESHARES_RATIO'], 0), 'holding_values': item['HOLD_VALUE'] } for item in result] df = pd.DataFrame.from_records(holders) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True, drop_duplicates=True) # save the actors actors = [{ 'id': f'{actor_type.value}_cn_{item["HOLDER_CODE"]}', 'entity_id': f'{actor_type.value}_cn_{item["HOLDER_CODE"]}', 'entity_type': actor_type.value, 'exchange': 'cn', 'code': item["HOLDER_CODE"], 'name': f'{item["HOLDER_NAME"]}' } for item in result] df1 = pd.DataFrame.from_records(actors) df_to_db(data_schema=ActorMeta, df=df1, provider=self.provider, force_update=False, drop_duplicates=True)