def record(self, entity, start, end, size, timestamps): q = query(finance.FUND_PORTFOLIO_STOCK).filter(finance.FUND_PORTFOLIO_STOCK.pub_date >= start).filter( finance.FUND_PORTFOLIO_STOCK.code == entity.code) df = finance.run_query(q) if pd_is_not_null(df): # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 df['timestamp'] = pd.to_datetime(df['pub_date']) df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True) df['proportion'] = df['proportion'] * 0.01 df = portfolio_relate_stock(df, entity) df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x)) df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['report_date'] = pd.to_datetime(df['period_end']) df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x)) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) # self.logger.info(df.tail()) self.logger.info(f"persist etf {entity.code} portfolio success") return None
def format(self, entity, df): # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime(df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True) df['proportion'] *= 0.01 df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x)) df['report_date'] = pd.to_datetime(df['period_end']) df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x)) df['entity_id'] = entity.id df['provider'] = self.provider.value df['entity_type'] = entity.entity_type df['exchange'] = entity.exchange df['code'] = entity.code df['name'] = entity.name df['id'] = self.generate_domain_id(entity, df) return df
def record(self, entity, start, end, size, timestamps): df = run_query( table="finance.FUND_PORTFOLIO_STOCK", conditions=f"pub_date#>=#{to_time_str(start)}&code#=#{entity.code}", parse_dates=None, ) if pd_is_not_null(df): # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 df["timestamp"] = pd.to_datetime(df["pub_date"]) df.rename(columns={ "symbol": "stock_code", "name": "stock_name" }, inplace=True) df["proportion"] = df["proportion"] * 0.01 df = portfolio_relate_stock(df, entity) df["stock_id"] = df["stock_code"].apply( lambda x: china_stock_code_to_id(x)) df["id"] = df[["entity_id", "stock_id", "pub_date", "id"]].apply(lambda x: "_".join(x.astype(str)), axis=1) df["report_date"] = pd.to_datetime(df["period_end"]) df["report_period"] = df["report_type"].apply( lambda x: jq_to_report_period(x)) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) # self.logger.info(df.tail()) self.logger.info( f"persist etf {entity.code} portfolio success {df.iloc[-1]['pub_date']}" ) return None
def record(self, entity, start, end, size, timestamps): # 忽略退市的 if entity.end_date: return None redundant_times = 1 while redundant_times > 0: df = run_query( table='finance.FUND_PORTFOLIO_STOCK', conditions= f'pub_date#>=#{to_time_str(start)}&code#=#{entity.code}', parse_dates=None) df = df.dropna() if pd_is_not_null(df): # data format # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 df['timestamp'] = pd.to_datetime(df['pub_date']) df.rename(columns={ 'symbol': 'stock_code', 'name': 'stock_name' }, inplace=True) df['proportion'] = df['proportion'] * 0.01 df = portfolio_relate_stock(df, entity) df['stock_id'] = df['stock_code'].apply( lambda x: china_stock_code_to_id(x)) df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['report_date'] = pd.to_datetime(df['period_end']) df['report_period'] = df['report_type'].apply( lambda x: jq_to_report_period(x)) saved = df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) # 取不到非重复的数据 if saved == 0: return None # self.logger.info(df.tail()) self.logger.info( f"persist fund {entity.code}({entity.name}) portfolio success {df.iloc[-1]['pub_date']}" ) latest = df['timestamp'].max() # 取到了最近两年的数据,再请求一次,确保取完最新的数据 if latest.year >= now_pd_timestamp().year - 1: redundant_times = redundant_times - 1 start = latest else: return None return None