def record(self, entity, start, end, size, timestamps): for page in range(1, 5): resp = requests.get(self.category_stocks_url.format(page, entity.code)) try: if resp.text == "null" or resp.text is None: break category_jsons = demjson3.decode(resp.text) the_list = [] for category in category_jsons: stock_code = category["code"] stock_id = china_stock_code_to_id(stock_code) block_id = entity.id the_list.append( { "id": "{}_{}".format(block_id, stock_id), "entity_id": block_id, "entity_type": "block", "exchange": entity.exchange, "code": entity.code, "name": entity.name, "timestamp": now_pd_timestamp(), "stock_id": stock_id, "stock_code": stock_code, "stock_name": category["name"], } ) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info("finish recording BlockStock:{},{}".format(entity.category, entity.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def record(self, entity, start, end, size, timestamps): resp = requests.get(self.category_stocks_url.format(entity.code, "1"), headers=DEFAULT_HEADER) try: results = json_callback_param(resp.text) the_list = [] for result in results: items = result.split(",") stock_code = items[1] stock_id = china_stock_code_to_id(stock_code) block_id = entity.id the_list.append( { "id": "{}_{}".format(block_id, stock_id), "entity_id": block_id, "entity_type": "block", "exchange": entity.exchange, "code": entity.code, "name": entity.name, "timestamp": now_pd_timestamp(), "stock_id": stock_id, "stock_code": stock_code, "stock_name": items[2], } ) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info("finish recording block:{},{}".format(entity.category, entity.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def download_sz_etf_component(self, df: pd.DataFrame): query_url = "http://vip.stock.finance.sina.com.cn/corp/go.php/vII_NewestComponent/indexid/{}.phtml" self.parse_sz_etf_underlying_index(df) for _, etf in df.iterrows(): underlying_index = etf["拟合指数"] etf_code = etf["证券代码"] if len(underlying_index) == 0: self.logger.info(f'{etf["证券简称"]} - {etf_code} 非 A 股市场指数,跳过...') continue url = query_url.format(underlying_index) response = requests.get(url) response.encoding = "gbk" try: dfs = pd.read_html(response.text, header=1) except ValueError as error: self.logger.error( f"HTML parse error: {error}, response: {response.text}") continue if len(dfs) < 4: continue response_df = dfs[3].copy() response_df = response_df.dropna(axis=1, how="any") response_df["品种代码"] = response_df["品种代码"].apply( lambda x: f"{x:06d}") etf_id = f"etf_sz_{etf_code}" response_df = response_df[["品种代码", "品种名称"]].copy() response_df.rename(columns={ "品种代码": "stock_code", "品种名称": "stock_name" }, inplace=True) response_df["entity_id"] = etf_id response_df["entity_type"] = "etf" response_df["exchange"] = "sz" response_df["code"] = etf_code response_df["name"] = etf["证券简称"] response_df["timestamp"] = now_pd_timestamp() response_df["stock_id"] = response_df["stock_code"].apply( lambda code: china_stock_code_to_id(code)) response_df["id"] = response_df["stock_id"].apply( lambda x: f"{etf_id}_{x}") df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider) self.logger.info(f'{etf["证券简称"]} - {etf_code} 成分股抓取完成...') self.sleep()
def download_sh_etf_component(self, df: pd.DataFrame): """ ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF 5. 债券 ETF 6. 黄金 ETF :param df: ETF 列表数据 :return: None """ query_url = ( "http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?" "isPagination=false&type={}&etfClass={}") etf_df = df[(df["ETF_CLASS"] == "1") | (df["ETF_CLASS"] == "2")] etf_df = self.populate_sh_etf_type(etf_df) for _, etf in etf_df.iterrows(): url = query_url.format(etf["ETF_TYPE"], etf["ETF_CLASS"]) response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER) response_dict = demjson3.decode(response.text) response_df = pd.DataFrame(response_dict.get("result", [])) etf_code = etf["FUND_ID"] etf_id = f"etf_sh_{etf_code}" response_df = response_df[["instrumentId", "instrumentName"]].copy() response_df.rename(columns={ "instrumentId": "stock_code", "instrumentName": "stock_name" }, inplace=True) response_df["entity_id"] = etf_id response_df["entity_type"] = "etf" response_df["exchange"] = "sh" response_df["code"] = etf_code response_df["name"] = etf["FUND_NAME"] response_df["timestamp"] = now_pd_timestamp() response_df["stock_id"] = response_df["stock_code"].apply( lambda code: china_stock_code_to_id(code)) response_df["id"] = response_df["stock_id"].apply( lambda x: f"{etf_id}_{x}") df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider) self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...') self.sleep()
def get_cn_index_stock(code, timestamp, name=None): entity_type = "index" exchange = "sz" entity_id = f"{entity_type}_{exchange}_{code}" data_str = to_time_str(timestamp, TIME_FORMAT_MON) resp = requests.get(url.format(code, data_str), headers=DEFAULT_HEADER) data = _get_resp_data(resp) if not data: return results = _get_resp_data(resp)["rows"] the_list = [] for result in results: # date: 1614268800000 # dateStr: "2021-02-26" # freeMarketValue: 10610.8 # indexcode: "399370" # market: null # seccode: "600519" # secname: "贵州茅台" # totalMarketValue: 26666.32 # trade: "主要消费" # weight: 10.01 stock_code = result["seccode"] stock_name = result["secname"] stock_id = china_stock_code_to_id(stock_code) the_list.append( { "id": "{}_{}_{}".format(entity_id, result["dateStr"], stock_id), "entity_id": entity_id, "entity_type": entity_type, "exchange": exchange, "code": code, "name": name, "timestamp": to_pd_timestamp(result["dateStr"]), "stock_id": stock_id, "stock_code": stock_code, "stock_name": stock_name, "proportion": value_to_pct(result["weight"], 0), "market_cap": value_multiply(result["freeMarketValue"], 100000000, 0), } ) if the_list: df = pd.DataFrame.from_records(the_list) return df
def get_cn_index_stock(code, timestamp, name=None): entity_type = 'index' exchange = 'sz' entity_id = f'{entity_type}_{exchange}_{code}' data_str = to_time_str(timestamp, TIME_FORMAT_MON) resp = requests.get(url.format(code, data_str), headers=DEFAULT_HEADER) data = _get_resp_data(resp) if not data: return results = _get_resp_data(resp)['rows'] the_list = [] for result in results: # date: 1614268800000 # dateStr: "2021-02-26" # freeMarketValue: 10610.8 # indexcode: "399370" # market: null # seccode: "600519" # secname: "贵州茅台" # totalMarketValue: 26666.32 # trade: "主要消费" # weight: 10.01 stock_code = result['seccode'] stock_name = result['secname'] stock_id = china_stock_code_to_id(stock_code) the_list.append({ 'id': '{}_{}_{}'.format(entity_id, result['dateStr'], stock_id), 'entity_id': entity_id, 'entity_type': entity_type, 'exchange': exchange, 'code': code, 'name': name, 'timestamp': to_pd_timestamp(result['dateStr']), 'stock_id': stock_id, 'stock_code': stock_code, 'stock_name': stock_name, 'proportion': value_to_pct(result['weight'], 0), 'market_cap': value_multiply(result['freeMarketValue'], 100000000, 0) }) if the_list: df = pd.DataFrame.from_records(the_list) return df
def record(self, entity, start, end, size, timestamps): df = run_query( table="finance.FUND_PORTFOLIO_STOCK", conditions=f"pub_date#>=#{to_time_str(start)}&code#=#{entity.code}", parse_dates=None, ) if pd_is_not_null(df): # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 df["timestamp"] = pd.to_datetime(df["pub_date"]) df.rename(columns={ "symbol": "stock_code", "name": "stock_name" }, inplace=True) df["proportion"] = df["proportion"] * 0.01 df = portfolio_relate_stock(df, entity) df["stock_id"] = df["stock_code"].apply( lambda x: china_stock_code_to_id(x)) df["id"] = df[["entity_id", "stock_id", "pub_date", "id"]].apply(lambda x: "_".join(x.astype(str)), axis=1) df["report_date"] = pd.to_datetime(df["period_end"]) df["report_period"] = df["report_type"].apply( lambda x: jq_to_report_period(x)) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) # self.logger.info(df.tail()) self.logger.info( f"persist etf {entity.code} portfolio success {df.iloc[-1]['pub_date']}" ) return None
def record(self, entity, start, end, size, timestamps): for page in range(1, 5): resp = requests.get( self.category_stocks_url.format(page, entity.code)) try: if resp.text == 'null' or resp.text is None: break category_jsons = demjson.decode(resp.text) the_list = [] for category in category_jsons: stock_code = category['code'] stock_id = china_stock_code_to_id(stock_code) block_id = entity.id the_list.append({ 'id': '{}_{}'.format(block_id, stock_id), 'entity_id': block_id, 'entity_type': 'block', 'exchange': entity.exchange, 'code': entity.code, 'name': entity.name, 'timestamp': now_pd_timestamp(), 'stock_id': stock_id, 'stock_code': stock_code, 'stock_name': category['name'], }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info('finish recording BlockStock:{},{}'.format( entity.category, entity.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def record(self, entity, start, end, size, timestamps): # 忽略退市的 if entity.end_date: return None redundant_times = 1 while redundant_times > 0: df = run_query( table='finance.FUND_PORTFOLIO_STOCK', conditions= f'pub_date#>=#{to_time_str(start)}&code#=#{entity.code}', parse_dates=None) df = df.dropna() if pd_is_not_null(df): # data format # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 df['timestamp'] = pd.to_datetime(df['pub_date']) df.rename(columns={ 'symbol': 'stock_code', 'name': 'stock_name' }, inplace=True) df['proportion'] = df['proportion'] * 0.01 df = portfolio_relate_stock(df, entity) df['stock_id'] = df['stock_code'].apply( lambda x: china_stock_code_to_id(x)) df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['report_date'] = pd.to_datetime(df['period_end']) df['report_period'] = df['report_type'].apply( lambda x: jq_to_report_period(x)) saved = df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) # 取不到非重复的数据 if saved == 0: return None # self.logger.info(df.tail()) self.logger.info( f"persist fund {entity.code}({entity.name}) portfolio success {df.iloc[-1]['pub_date']}" ) latest = df['timestamp'].max() # 取到了最近两年的数据,再请求一次,确保取完最新的数据 if latest.year >= now_pd_timestamp().year - 1: redundant_times = redundant_times - 1 start = latest else: return None return None