Ejemplo n.º 1
0
def _get_global_index_country_name_url() -> dict:
    """
    全球指数-各国的全球指数数据
    https://cn.investing.com/indices/global-indices?majorIndices=on&primarySectors=on&bonds=on&additionalIndices=on&otherIndices=on&c_id=37
    :return: 国家和代码
    :rtype: dict
    """
    url = "https://cn.investing.com/indices/global-indices"
    params = {
        "majorIndices": "on",
        "primarySectors": "on",
        "bonds": "on",
        "additionalIndices": "on",
        "otherIndices": "on",
    }
    r = session.get(url, params=params, headers=short_headers)
    data_text = r.text
    soup = BeautifulSoup(data_text, "lxml")
    name_url_option_list = soup.find_all("option")[1:]
    url_list = [
        item["value"] for item in name_url_option_list
        if "c_id" in item["value"]
    ]
    url_list_code = [
        item["value"].split("?")[1].split("=")[1]
        for item in name_url_option_list if "c_id" in item["value"]
    ]
    name_list = [item.get_text()
                 for item in name_url_option_list][:len(url_list)]
    _temp_df = pd.DataFrame([name_list, url_list_code]).T
    name_code_list = dict(zip(_temp_df.iloc[:, 0], _temp_df.iloc[:, 1]))
    return name_code_list
Ejemplo n.º 2
0
def _get_global_country_name_url() -> dict:
    """
    指数数据国家对应的 URL
    https://cn.investing.com/rates-bonds/
    :return: 指数数据国家对应的 URL
    :rtype: dict
    """
    url = "https://cn.investing.com/rates-bonds/"
    res = session.get(url, headers=short_headers, timeout=30)
    soup = BeautifulSoup(res.text, "lxml")
    name_url_option_list = soup.find("select", attrs={
        "name": "country"
    }).find_all("option")[1:]
    url_list = [item["value"] for item in name_url_option_list]
    name_list = [item.get_text() for item in name_url_option_list]
    name_code_map_dict = {}
    name_code_map_dict.update(zip(name_list, url_list))
    return name_code_map_dict
Ejemplo n.º 3
0
def index_investing_global_country_name_url(country: str = "中国") -> dict:
    """
    参考网页: https://cn.investing.com/indices/
    获取选择国家对应的: 主要指数, 主要行业, 附加指数, 其他指数
    :param country: str 中文国家名称, 对应 get_global_country_name_url 函数返回的国家名称
    :return: dict
    """
    pd.set_option("mode.chained_assignment", None)
    name_url_dict = _get_global_country_name_url()
    name_code_dict = _get_global_index_country_name_url()
    url = f"https://cn.investing.com{name_url_dict[country]}?&majorIndices=on&primarySectors=on&additionalIndices=on&otherIndices=on"
    res = session.post(url, headers=short_headers)
    soup = BeautifulSoup(res.text, "lxml")
    url_list = [
        item.find("a")["href"]
        for item in soup.find_all(attrs={"class": "plusIconTd"})
    ]
    name_list = [
        item.find("a").get_text()
        for item in soup.find_all(attrs={"class": "plusIconTd"})
    ]
    name_code_map_dict = {}
    name_code_map_dict.update(zip(name_list, url_list))

    url = "https://cn.investing.com/indices/global-indices"
    params = {
        "majorIndices": "on",
        "primarySectors": "on",
        "bonds": "on",
        "additionalIndices": "on",
        "otherIndices": "on",
        "c_id": name_code_dict[country],
    }
    r = session.get(url, params=params, headers=short_headers)
    data_text = r.text
    soup = BeautifulSoup(data_text, "lxml")
    soup_list = soup.find("table", attrs={"id": "cr_12"}).find_all("a")
    global_index_url = [item["href"] for item in soup_list]
    global_index_name = [item["title"] for item in soup_list]
    name_code_map_dict.update(zip(global_index_name, global_index_url))
    return name_code_map_dict
Ejemplo n.º 4
0
def bond_investing_global_country_name_url(country: str = "中国") -> dict:
    """
    参考网页: https://cn.investing.com/rates-bonds/
    获取选择国家对应的: 主要指数, 主要行业, 附加指数, 其他指数
    :param country: str 中文国家名称, 对应 get_global_country_name_url 函数返回的国家名称
    :return: dict
    """
    name_url_dict = _get_global_country_name_url()
    url = f"https://cn.investing.com{name_url_dict[country]}"
    res = session.get(url, headers=short_headers, timeout=30)
    soup = BeautifulSoup(res.text, "lxml")
    url_list = [
        item.find("a")["href"]
        for item in soup.find_all(attrs={"class": "plusIconTd"})
    ]
    name_list = [
        item.find("a").get_text()
        for item in soup.find_all(attrs={"class": "plusIconTd"})
    ]
    name_code_map_dict = {}
    name_code_map_dict.update(zip(name_list, url_list))
    return name_code_map_dict
Ejemplo n.º 5
0
def bond_investing_global(
    country: str = "中国",
    index_name: str = "中国1年期国债",
    period: str = "每日",
    start_date: str = "20000101",
    end_date: str = "20191017",
) -> pd.DataFrame:
    """
    具体国家的具体指数的从 start_date 到 end_date 期间的数据
    https://cn.investing.com/rates-bonds/
    :param country: 对应函数中的国家名称
    :type country: str
    :param index_name: 对应函数中的指数名称
    :type index_name: str
    :param period: choice of {"每日", "每周", "每月"}
    :type period: str
    :param start_date: '2000-01-01', 注意格式
    :type start_date: str
    :param end_date: '2019-10-17', 注意格式
    :type end_date: str
    :return: 指定参数的数据
    :rtype: pandas.DataFrame
    """
    start_date = "/".join([start_date[:4], start_date[4:6], start_date[6:]])
    end_date = "/".join([end_date[:4], end_date[4:6], end_date[6:]])
    period_map = {"每日": "Daily", "每周": "Weekly", "每月": "Monthly"}
    name_code_dict = bond_investing_global_country_name_url(country)
    temp_url = f"https://cn.investing.com/{name_code_dict[index_name]}-historical-data"
    res = session.get(temp_url, headers=short_headers, timeout=30)
    soup = BeautifulSoup(res.text, "lxml")
    title = soup.find("h2", attrs={"class": "float_lang_base_1"}).get_text()
    data = soup.find_all(
        text=re.compile("window.histDataExcessInfo"))[0].strip()
    para_data = re.findall(r"\d+", data)
    payload = {
        "curr_id": para_data[0],
        "smlID": para_data[1],
        "header": title,
        "st_date": start_date,
        "end_date": end_date,
        "interval_sec": period_map[period],
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data",
    }
    url = "https://cn.investing.com/instruments/HistoricalDataAjax"
    res = session.post(url, data=payload, headers=long_headers, timeout=60)
    df_data = pd.read_html(res.text)[0]
    df_data.columns = [
        '日期',
        '收盘',
        '开盘',
        '高',
        '低',
        '涨跌幅',
    ]
    if period == "每月":
        df_data.index = pd.to_datetime(df_data["日期"], format="%Y年%m月")
    else:
        df_data.index = pd.to_datetime(df_data["日期"], format="%Y年%m月%d日")
    df_data = df_data[["收盘", "开盘", "高", "低", "涨跌幅"]]
    df_data["涨跌幅"] = df_data["涨跌幅"].str.replace("%", "")
    df_data["涨跌幅"] = df_data["涨跌幅"].str.replace(",", "")
    df_data = df_data.astype(float)
    return df_data
Ejemplo n.º 6
0
def index_kq_fz(symbol: str = "价格指数") -> pd.DataFrame:
    """
    中国柯桥纺织指数
    http://www.kqindex.cn/flzs/jiage
    :param symbol: choice of {'价格指数', '景气指数', '外贸指数'}
    :type symbol: str
    :return: 中国柯桥纺织指数
    :rtype: pandas.DataFrame
    """
    symbol_map = {
        "价格指数": "1_1",
        "景气指数": "1_2",
        "外贸指数": "2",
    }
    url = "http://www.kqindex.cn/flzs/table_data"
    params = {
        "category": "0",
        "start": "",
        "end": "",
        "indexType": f"{symbol_map[symbol]}",
        "pageindex": "1",
        "_": "1619871781413",
    }
    r = session.get(url, params=params)
    data_json = r.json()
    page_num = data_json["page"]
    big_df = pd.DataFrame()
    for page in tqdm(range(1, page_num + 1), leave=False):
        params = {
            "category": "0",
            "start": "",
            "end": "",
            "indexType": f"{symbol_map[symbol]}",
            "pageindex": page,
            "_": "1619871781413",
        }
        r = session.get(url, params=params)
        data_json = r.json()
        temp_df = pd.DataFrame(data_json["result"])
        big_df = big_df.append(temp_df, ignore_index=True)
    if symbol == "价格指数":
        big_df.columns = [
            "期次",
            "指数",
            "涨跌幅",
        ]
    elif symbol == "景气指数":
        big_df.columns = [
            "期次",
            "总景气指数",
            "涨跌幅",
            "流通景气指数",
            "生产景气指数",
        ]
    elif symbol == "外贸指数":
        big_df.columns = [
            "期次",
            "价格指数",
            "涨跌幅",
            "景气指数",
            "涨跌幅",
        ]
    return big_df