def _get_global_index_country_name_url() -> dict: """ 全球指数-各国的全球指数数据 https://cn.investing.com/indices/global-indices?majorIndices=on&primarySectors=on&bonds=on&additionalIndices=on&otherIndices=on&c_id=37 :return: 国家和代码 :rtype: dict """ url = "https://cn.investing.com/indices/global-indices" params = { "majorIndices": "on", "primarySectors": "on", "bonds": "on", "additionalIndices": "on", "otherIndices": "on", } r = session.get(url, params=params, headers=short_headers) data_text = r.text soup = BeautifulSoup(data_text, "lxml") name_url_option_list = soup.find_all("option")[1:] url_list = [ item["value"] for item in name_url_option_list if "c_id" in item["value"] ] url_list_code = [ item["value"].split("?")[1].split("=")[1] for item in name_url_option_list if "c_id" in item["value"] ] name_list = [item.get_text() for item in name_url_option_list][:len(url_list)] _temp_df = pd.DataFrame([name_list, url_list_code]).T name_code_list = dict(zip(_temp_df.iloc[:, 0], _temp_df.iloc[:, 1])) return name_code_list
def _get_global_country_name_url() -> dict: """ 指数数据国家对应的 URL https://cn.investing.com/rates-bonds/ :return: 指数数据国家对应的 URL :rtype: dict """ url = "https://cn.investing.com/rates-bonds/" res = session.get(url, headers=short_headers, timeout=30) soup = BeautifulSoup(res.text, "lxml") name_url_option_list = soup.find("select", attrs={ "name": "country" }).find_all("option")[1:] url_list = [item["value"] for item in name_url_option_list] name_list = [item.get_text() for item in name_url_option_list] name_code_map_dict = {} name_code_map_dict.update(zip(name_list, url_list)) return name_code_map_dict
def index_investing_global_country_name_url(country: str = "中国") -> dict: """ 参考网页: https://cn.investing.com/indices/ 获取选择国家对应的: 主要指数, 主要行业, 附加指数, 其他指数 :param country: str 中文国家名称, 对应 get_global_country_name_url 函数返回的国家名称 :return: dict """ pd.set_option("mode.chained_assignment", None) name_url_dict = _get_global_country_name_url() name_code_dict = _get_global_index_country_name_url() url = f"https://cn.investing.com{name_url_dict[country]}?&majorIndices=on&primarySectors=on&additionalIndices=on&otherIndices=on" res = session.post(url, headers=short_headers) soup = BeautifulSoup(res.text, "lxml") url_list = [ item.find("a")["href"] for item in soup.find_all(attrs={"class": "plusIconTd"}) ] name_list = [ item.find("a").get_text() for item in soup.find_all(attrs={"class": "plusIconTd"}) ] name_code_map_dict = {} name_code_map_dict.update(zip(name_list, url_list)) url = "https://cn.investing.com/indices/global-indices" params = { "majorIndices": "on", "primarySectors": "on", "bonds": "on", "additionalIndices": "on", "otherIndices": "on", "c_id": name_code_dict[country], } r = session.get(url, params=params, headers=short_headers) data_text = r.text soup = BeautifulSoup(data_text, "lxml") soup_list = soup.find("table", attrs={"id": "cr_12"}).find_all("a") global_index_url = [item["href"] for item in soup_list] global_index_name = [item["title"] for item in soup_list] name_code_map_dict.update(zip(global_index_name, global_index_url)) return name_code_map_dict
def bond_investing_global_country_name_url(country: str = "中国") -> dict: """ 参考网页: https://cn.investing.com/rates-bonds/ 获取选择国家对应的: 主要指数, 主要行业, 附加指数, 其他指数 :param country: str 中文国家名称, 对应 get_global_country_name_url 函数返回的国家名称 :return: dict """ name_url_dict = _get_global_country_name_url() url = f"https://cn.investing.com{name_url_dict[country]}" res = session.get(url, headers=short_headers, timeout=30) soup = BeautifulSoup(res.text, "lxml") url_list = [ item.find("a")["href"] for item in soup.find_all(attrs={"class": "plusIconTd"}) ] name_list = [ item.find("a").get_text() for item in soup.find_all(attrs={"class": "plusIconTd"}) ] name_code_map_dict = {} name_code_map_dict.update(zip(name_list, url_list)) return name_code_map_dict
def bond_investing_global( country: str = "中国", index_name: str = "中国1年期国债", period: str = "每日", start_date: str = "20000101", end_date: str = "20191017", ) -> pd.DataFrame: """ 具体国家的具体指数的从 start_date 到 end_date 期间的数据 https://cn.investing.com/rates-bonds/ :param country: 对应函数中的国家名称 :type country: str :param index_name: 对应函数中的指数名称 :type index_name: str :param period: choice of {"每日", "每周", "每月"} :type period: str :param start_date: '2000-01-01', 注意格式 :type start_date: str :param end_date: '2019-10-17', 注意格式 :type end_date: str :return: 指定参数的数据 :rtype: pandas.DataFrame """ start_date = "/".join([start_date[:4], start_date[4:6], start_date[6:]]) end_date = "/".join([end_date[:4], end_date[4:6], end_date[6:]]) period_map = {"每日": "Daily", "每周": "Weekly", "每月": "Monthly"} name_code_dict = bond_investing_global_country_name_url(country) temp_url = f"https://cn.investing.com/{name_code_dict[index_name]}-historical-data" res = session.get(temp_url, headers=short_headers, timeout=30) soup = BeautifulSoup(res.text, "lxml") title = soup.find("h2", attrs={"class": "float_lang_base_1"}).get_text() data = soup.find_all( text=re.compile("window.histDataExcessInfo"))[0].strip() para_data = re.findall(r"\d+", data) payload = { "curr_id": para_data[0], "smlID": para_data[1], "header": title, "st_date": start_date, "end_date": end_date, "interval_sec": period_map[period], "sort_col": "date", "sort_ord": "DESC", "action": "historical_data", } url = "https://cn.investing.com/instruments/HistoricalDataAjax" res = session.post(url, data=payload, headers=long_headers, timeout=60) df_data = pd.read_html(res.text)[0] df_data.columns = [ '日期', '收盘', '开盘', '高', '低', '涨跌幅', ] if period == "每月": df_data.index = pd.to_datetime(df_data["日期"], format="%Y年%m月") else: df_data.index = pd.to_datetime(df_data["日期"], format="%Y年%m月%d日") df_data = df_data[["收盘", "开盘", "高", "低", "涨跌幅"]] df_data["涨跌幅"] = df_data["涨跌幅"].str.replace("%", "") df_data["涨跌幅"] = df_data["涨跌幅"].str.replace(",", "") df_data = df_data.astype(float) return df_data
def index_kq_fz(symbol: str = "价格指数") -> pd.DataFrame: """ 中国柯桥纺织指数 http://www.kqindex.cn/flzs/jiage :param symbol: choice of {'价格指数', '景气指数', '外贸指数'} :type symbol: str :return: 中国柯桥纺织指数 :rtype: pandas.DataFrame """ symbol_map = { "价格指数": "1_1", "景气指数": "1_2", "外贸指数": "2", } url = "http://www.kqindex.cn/flzs/table_data" params = { "category": "0", "start": "", "end": "", "indexType": f"{symbol_map[symbol]}", "pageindex": "1", "_": "1619871781413", } r = session.get(url, params=params) data_json = r.json() page_num = data_json["page"] big_df = pd.DataFrame() for page in tqdm(range(1, page_num + 1), leave=False): params = { "category": "0", "start": "", "end": "", "indexType": f"{symbol_map[symbol]}", "pageindex": page, "_": "1619871781413", } r = session.get(url, params=params) data_json = r.json() temp_df = pd.DataFrame(data_json["result"]) big_df = big_df.append(temp_df, ignore_index=True) if symbol == "价格指数": big_df.columns = [ "期次", "指数", "涨跌幅", ] elif symbol == "景气指数": big_df.columns = [ "期次", "总景气指数", "涨跌幅", "流通景气指数", "生产景气指数", ] elif symbol == "外贸指数": big_df.columns = [ "期次", "价格指数", "涨跌幅", "景气指数", "涨跌幅", ] return big_df