def record(self, entity, start, end, size, timestamps): jq_code = code_map_jq.get(entity.code) df = run_query( table='finance.STK_MT_TOTAL', conditions= f'exchange_code#=#{jq_code}&date#>=#{to_time_str(start)}', parse_dates=['date']) print(df) json_results = [] for item in df.to_dict(orient='records'): result = { 'provider': self.provider, 'timestamp': item['date'], 'name': entity.name, 'margin_value': item['fin_value'], 'margin_buy': item['fin_buy_value'], 'short_value': item['sec_value'], 'short_volume': item['sec_sell_volume'], 'total_value': item['fin_sec_value'] } json_results.append(result) if len(json_results) < 100: self.one_shot = True return json_results
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: df = run_query( table="finance.STK_HK_HOLD_INFO", conditions= f"link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}") print(df) if pd_is_not_null(df): df.rename(columns={ "day": "timestamp", "link_id": "holder_code", "link_name": "holder_name" }, inplace=True) df["timestamp"] = pd.to_datetime(df["timestamp"]) df["entity_id"] = df["code"].apply( lambda x: to_entity_id(entity_type="stock", jq_code=x)) df["code"] = df["code"].apply(lambda x: x.split(".")[0]) # id格式为:{holder_name}_{entity_id}_{timestamp} df["id"] = df[["holder_name", "entity_id", "timestamp"]].apply( lambda se: "{}_{}_{}".format( se["holder_name"], se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)), axis=1, ) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def record(self, entity, start, end, size, timestamps): df = run_query(table='finance.FUND_PORTFOLIO_STOCK', conditions=f'pub_date#>=#{to_time_str(start)}&code#=#{entity.code}', parse_dates=None) if pd_is_not_null(df): # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 df['timestamp'] = pd.to_datetime(df['pub_date']) df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True) df['proportion'] = df['proportion'] * 0.01 df = portfolio_relate_stock(df, entity) df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x)) df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['report_date'] = pd.to_datetime(df['period_end']) df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x)) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) # self.logger.info(df.tail()) self.logger.info(f"persist etf {entity.code} portfolio success {df.iloc[-1]['pub_date']}") return None
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: df = run_query( table='finance.STK_HK_HOLD_INFO', conditions= f'link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}') print(df) if pd_is_not_null(df): df.rename(columns={ 'day': 'timestamp', 'link_id': 'holder_code', 'link_name': 'holder_name' }, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = df['code'].apply( lambda x: to_entity_id(entity_type='stock', jq_code=x)) df['code'] = df['code'].apply(lambda x: x.split('.')[0]) # id格式为:{holder_name}_{entity_id}_{timestamp} df['id'] = df[['holder_name', 'entity_id', 'timestamp']].apply( lambda se: "{}_{}_{}".format( se['holder_name'], se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)), axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def record(self, entity, start, end, size, timestamps): df = run_query(table='finance.STK_ML_QUOTA', conditions=f'link_id#=#{entity.code}&day#>=#{to_time_str(start)}') print(df) json_results = [] for item in df.to_dict(orient='records'): result = { 'provider': self.provider, 'timestamp': item['day'], 'name': entity.name, 'buy_amount': multiple_number(item['buy_amount'], 100000000), 'buy_volume': item['buy_volume'], 'sell_amount': multiple_number(item['sell_amount'], 100000000), 'sell_volume': item['sell_volume'], 'quota_daily': multiple_number(item['quota_daily'], 100000000), 'quota_daily_balance': multiple_number(item['quota_daily_balance'], 100000000) } json_results.append(result) if len(json_results) < 100: self.one_shot = True return json_results
def record(self, entity, start, end, size, timestamps): jq_code = code_map_jq.get(entity.code) df = run_query( table="finance.STK_MT_TOTAL", conditions= f"exchange_code#=#{jq_code}&date#>=#{to_time_str(start)}", parse_dates=["date"], ) print(df) json_results = [] for item in df.to_dict(orient="records"): result = { "provider": self.provider, "timestamp": item["date"], "name": entity.name, "margin_value": item["fin_value"], "margin_buy": item["fin_buy_value"], "short_value": item["sec_value"], "short_volume": item["sec_sell_volume"], "total_value": item["fin_sec_value"], } json_results.append(result) if len(json_results) < 100: self.one_shot = True return json_results
def record(self, entity, start, end, size, timestamps): df = run_query(table="finance.STK_ML_QUOTA", conditions=f"link_id#=#{entity.code}&day#>=#{to_time_str(start)}") print(df) json_results = [] for item in df.to_dict(orient="records"): result = { "provider": self.provider, "timestamp": item["day"], "name": entity.name, "buy_amount": multiple_number(item["buy_amount"], 100000000), "buy_volume": item["buy_volume"], "sell_amount": multiple_number(item["sell_amount"], 100000000), "sell_volume": item["sell_volume"], "quota_daily": multiple_number(item["quota_daily"], 100000000), "quota_daily_balance": multiple_number(item["quota_daily_balance"], 100000000), } json_results.append(result) if len(json_results) < 100: self.one_shot = True return json_results
def run(self): # 按不同类别抓取 # 编码 基金运作方式 # 401001 开放式基金 # 401002 封闭式基金 # 401003 QDII # 401004 FOF # 401005 ETF # 401006 LOF for operate_mode_id in (401001, 401002, 401005): year_count = 2 while True: latest = Fund.query_data( filters=[Fund.operate_mode_id == operate_mode_id], order=Fund.timestamp.desc(), limit=1, return_type="domain", ) start_timestamp = "2000-01-01" if latest: start_timestamp = latest[0].timestamp end_timestamp = min( next_date(start_timestamp, 365 * year_count), now_pd_timestamp()) df = run_query( table="finance.FUND_MAIN_INFO", conditions= f"operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}", parse_dates=["start_date", "end_date"], dtype={"main_code": str}, ) if not pd_is_not_null(df) or (df["start_date"].max().year < end_timestamp.year): year_count = year_count + 1 if pd_is_not_null(df): df.rename(columns={"start_date": "timestamp"}, inplace=True) df["timestamp"] = pd.to_datetime(df["timestamp"]) df["list_date"] = df["timestamp"] df["end_date"] = pd.to_datetime(df["end_date"]) df["code"] = df["main_code"] df["entity_id"] = df["code"].apply( lambda x: to_entity_id(entity_type="fund", jq_code=x)) df["id"] = df["entity_id"] df["entity_type"] = "fund" df["exchange"] = "sz" df_to_db(df, data_schema=Fund, provider=self.provider, force_update=self.force_update) self.logger.info( f"persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}" ) if is_same_date(end_timestamp, now_pd_timestamp()): break
def run(self): # 按不同类别抓取 # 编码 基金运作方式 # 401001 开放式基金 # 401002 封闭式基金 # 401003 QDII # 401004 FOF # 401005 ETF # 401006 LOF for operate_mode_id in (401001, 401002, 401005): year_count = 2 while True: latest = Fund.query_data( filters=[Fund.operate_mode_id == operate_mode_id], order=Fund.timestamp.desc(), limit=1, return_type='domain') start_timestamp = '2000-01-01' if latest: start_timestamp = latest[0].timestamp end_timestamp = min( next_date(start_timestamp, 365 * year_count), now_pd_timestamp()) df = run_query( table='finance.FUND_MAIN_INFO', conditions= f'operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}', parse_dates=['start_date', 'end_date'], dtype={'main_code': str}) if not pd_is_not_null(df) or (df['start_date'].max().year < end_timestamp.year): year_count = year_count + 1 if pd_is_not_null(df): df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['code'] = df['main_code'] df['entity_id'] = df['code'].apply( lambda x: to_entity_id(entity_type='fund', jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = 'fund' df['exchange'] = 'sz' df_to_db(df, data_schema=Fund, provider=self.provider, force_update=self.force_update) self.logger.info( f'persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}' ) if is_same_date(end_timestamp, now_pd_timestamp()): break
def jq_run_query(table='finance.STK_EXCHANGE_TRADE_INFO', columns=None, conditions=None, count=1000, dtype={ 'code': str, 'symbol': str }, parse_dates=['day', 'pub_date']): try: return jq.run_query(table=table, columns=columns, conditions=conditions, count=count, dtype=dtype, parse_dates=parse_dates) except Exception as e: logger.error(f'jq_run_query, code: {dtype["code"]}, error: {e}') return None
def record(self, entity, start, end, size, timestamps): jq_code = code_map_jq.get(entity.code) df = run_query( table="finance.STK_EXCHANGE_TRADE_INFO", conditions= f"exchange_code#=#{jq_code}&date#>=#{to_time_str(start)}", parse_dates=["date"], ) print(df) json_results = [] for item in df.to_dict(orient="records"): result = { "provider": self.provider, "timestamp": item["date"], "name": entity.name, "pe": item["pe_average"], "total_value": multiple_number(item["total_market_cap"], 100000000), "total_tradable_vaule": multiple_number(item["circulating_market_cap"], 100000000), "volume": multiple_number(item["volume"], 10000), "turnover": multiple_number(item["money"], 100000000), "turnover_rate": item["turnover_ratio"], } json_results.append(result) if len(json_results) < 100: self.one_shot = True return json_results
def record(self, entity, start, end, size, timestamps): jq_code = code_map_jq.get(entity.code) df = run_query( table='finance.STK_EXCHANGE_TRADE_INFO', conditions= f'exchange_code#=#{jq_code}&date#>=#{to_time_str(start)}', parse_dates=['date']) print(df) json_results = [] for item in df.to_dict(orient='records'): result = { 'provider': self.provider, 'timestamp': item['date'], 'name': entity.name, 'pe': item['pe_average'], 'total_value': multiple_number(item['total_market_cap'], 100000000), 'total_tradable_vaule': multiple_number(item['circulating_market_cap'], 100000000), 'volume': multiple_number(item['volume'], 10000), 'turnover': multiple_number(item['money'], 100000000), 'turnover_rate': item['turnover_ratio'] } json_results.append(result) if len(json_results) < 100: self.one_shot = True return json_results
def record(self, entity, start, end, size, timestamps): # 忽略退市的 if entity.end_date: return None redundant_times = 1 while redundant_times > 0: df = run_query( table="finance.FUND_PORTFOLIO_STOCK", conditions= f"pub_date#>=#{to_time_str(start)}&code#=#{entity.code}", parse_dates=None, ) df = df.dropna() if pd_is_not_null(df): # data format # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 df["timestamp"] = pd.to_datetime(df["pub_date"]) df.rename(columns={ "symbol": "stock_code", "name": "stock_name" }, inplace=True) df["proportion"] = df["proportion"] * 0.01 df = portfolio_relate_stock(df, entity) df["stock_id"] = df["stock_code"].apply( lambda x: china_stock_code_to_id(x)) df["id"] = df[["entity_id", "stock_id", "pub_date", "id"]].apply(lambda x: "_".join(x.astype(str)), axis=1) df["report_date"] = pd.to_datetime(df["period_end"]) df["report_period"] = df["report_type"].apply( lambda x: jq_to_report_period(x)) saved = df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) # 取不到非重复的数据 if saved == 0: return None # self.logger.info(df.tail()) self.logger.info( f"persist fund {entity.code}({entity.name}) portfolio success {df.iloc[-1]['pub_date']}" ) latest = df["timestamp"].max() # 取到了最近两年的数据,再请求一次,确保取完最新的数据 if latest.year >= now_pd_timestamp().year - 1: redundant_times = redundant_times - 1 start = latest else: return None return None