예제 #1
0
def QA_ts_update_namechange():
    """
    保存所有股票的历史曾用名
    """
    # 建表
    coll = DATABASE.namechange
    coll.create_index(
        [
            ("code", ASCENDING),
            ("start_date_stamp", ASCENDING),
            ("end_date_stamp", ASCENDING),
            ("ann_date_stamp", ASCENDING),
        ],
        unique=True,
    )
    # 初始化数据接口
    pro = get_pro()
    # 获取历史所有股票
    symbol_list = sorted(
        list(set(QA_fmt_code_list(QA_fetch_stock_basic().index.tolist(),
                                  "ts"))))
    df = pd.DataFrame()
    for i, symbol in enumerate(symbol_list):
        if i % 100 == 0:
            print(f"Saving {i}th stock name, stock is {symbol}")
        try:
            df = df.append(pro.namechange(ts_code=symbol))
        except:
            time.sleep(1)
            try:
                df = df.append(pro.namechange(ts_code=symbol))
            except:
                raise ValueError("[ERROR]\t数据获取失败")
    # df.to_csv("test.csv")
    df["code"] = QA_fmt_code_list(df["ts_code"])
    df["start_date_stamp"] = df["start_date"].apply(QA_util_date_stamp)
    df["end_date_stamp"] = df["end_date"].apply(QA_util_date_stamp)
    df["ann_date_stamp"] = df["ann_date"].apply(QA_util_date_stamp)
    df = df.where(pd.notnull(df), None)
    js = QA_util_to_json_from_pandas(df.drop(columns=["ts_code"]))
    for item in js:
        if not item["end_date"]:
            item["end_date_stamp"] = 9999999999
        qry = {
            "code": item["code"],
            "start_date_stamp": item["start_date_stamp"],
            "end_date_stamp": item["end_date_stamp"],
            "ann_date_stamp": item["ann_date_stamp"],
        }
        if coll.count_documents(qry) == 0:
            coll.insert_one(item)
예제 #2
0
def QA_ts_update_stock_basic():
    """
    本地化所有股票基本信息
    """
    coll = DATABASE.stock_basic
    coll.create_index(
        [("code", ASCENDING), ("status", ASCENDING),
         ("list_date_stamp", ASCENDING)],
        unique=True,
    )

    # 初始化数据接口
    pro = get_pro()
    # 获取所有数据
    df_1 = pro.stock_basic(exchange="", list_status="L")
    df_2 = pro.stock_basic(exchange="", list_status="P")
    df_3 = pro.stock_basic(exchange="", list_status="D")
    df_1["status"] = "L"
    df_2["status"] = "P"
    df_3["status"] = "D"
    df = df_1.append(df_2).append(df_3)
    df["code"] = QA_fmt_code_list(df.ts_code)
    df["list_date_stamp"] = df.list_date.apply(QA_util_date_stamp)
    df = df.where(pd.notnull(df), None)
    js = QA_util_to_json_from_pandas(df.drop(columns=["ts_code", "symbol"]))
    for item in js:
        qry = {
            "code": item["code"],
            "status": item["status"],
            "list_date_stamp": item["list_date_stamp"],
        }
        if coll.count_documents(qry) == 0:  # 增量更新
            coll.insert_one(item)
예제 #3
0
 def _get_crosssection_financial(report_date, report_type, sheet_type,
                                 fields, wait_seconds, trial_count):
     nonlocal pro, max_trial
     if trial_count >= max_trial:
         raise ValueError("[ERROR]\tEXCEED MAX TRIAL!")
     try:
         if not fields:
             print(
                 f"pro.{sheet_type}_vip(period='{report_date}', report_type={report_type})"
             )
             df = eval(
                 f"pro.{sheet_type}_vip(period='{report_date}', report_type={report_type})"
             )
         else:
             df = eval(
                 f"pro.{sheet_type}_vip(period='{report_date}', report_type={report_type}, fields={fields})"
             )
         if df.empty:
             return df
         df.ts_code = QA_fmt_code_list(df.ts_code)
         return df.rename(columns={
             "ts_code": "code",
             "end_date": "report_date"
         }).sort_values(by=['ann_date', 'f_ann_date'])
     except Exception as e:
         print(e)
         time.sleep(wait_seconds)
         _get_crosssection_financial(report_date, report_type, sheet_type,
                                     fields, wait_seconds, trial_count + 1)
예제 #4
0
def QA_fetch_get_factor_start_date(factor: pd.Series) -> pd.DataFrame:
    """
    获取因子池上市时间, 注意,请自行登陆聚宽本地 sdk

    参数
    ---
    :param factor: 因子值,索引为 ['日期' '资产']

    返回值
    ---
    :return: 因子数据
    """
    # 因子格式化
    factor = QA_fmt_factor(factor.copy())
    merged_data = pd.DataFrame(factor.rename("factor"))
    # 股票代码格式化
    stock_list = QA_fmt_code_list(
        factor.index.get_level_values("code").drop_duplicates(), style="jq")
    # 上市时间获取
    df_local = jqdatasdk.get_all_securities(types="stock")
    intersection = df_local.index.intersection(stock_list)
    ss = df_local.loc[intersection]["start_date"]
    ss.index = ss.index.map(lambda x: x[:6])
    # 拼接上市时间
    merged_data = merged_data.loc[(slice(None), list(ss.index)), :]
    merged_data["start_date"] = merged_data.index.map(
        lambda x: ss.loc[x[1]]).tolist()
    return merged_data
예제 #5
0
def QA_fetch_factor_start_date(factor: pd.Series) -> pd.DataFrame:
    """
    获取因子池上市时间,本地获取接口,使用前先保存股票基本信息
    """
    factor = QA_fmt_factor(factor.copy())
    merged_data = pd.DataFrame(factor.rename("factor"))
    # 股票代码格式化
    stock_list = QA_fmt_code_list(
        factor.index.get_level_values("code").drop_duplicates())
    # 上市时间获取
    df_local = QA_fetch_stock_basic(status=None).set_index("code")
    intersection = df_local.index.intersection(stock_list)
    ss = df_local.loc[intersection]["list_date"]
    # 拼接上市时间
    merged_data = merged_data.loc[(slice(None), list(ss.index)), :]
    merged_data["start_date"] = merged_data.index.map(
        lambda x: ss.loc[x[1]]).tolist()
    return merged_data
예제 #6
0
def QA_ts_update_industry(
    level: Union[str, List, Tuple] = ["L1", "L2", "L3"],
    src: Union[str, List, Tuple] = "SW2021",
):
    """
    保存个股行业信息
    """
    pro = get_pro()
    if isinstance(level, str):
        level = [level]
    if isinstance(src, str):
        src = [src]
    df_industry = pd.DataFrame()
    for s in src:
        for lv in level:
            try:
                df_tmp = pro.index_classify(level=lv, src=s)
                df_tmp["src"] = "sw"
                df_industry = df_industry.append(df_tmp)
            except Exception as e1:
                print(e1)
                time.sleep(1)
                try:
                    df_tmp = pro.index_classify(level=lv, src=s)
                    df_tmp["src"] = "sw"
                    df_industry = df_industry.append(df_tmp)
                except Exception as e2:
                    raise ValueError(e2)
    df_results = pd.DataFrame()
    for idx, item in df_industry.iterrows():
        time.sleep(0.3)
        if idx % 100 == 0:
            print(f"currently saving {idx}th record")
        try:
            df_tmp = pro.index_member(index_code=item["index_code"])
            df_tmp["industry_name"] = item["industry_name"]
            df_tmp["level"] = item["level"].lower()
            df_tmp["src"] = item["src"].lower()
            df_results = df_results.append(df_tmp)
        except Exception as e1:
            print(e1)
            time.sleep(1)
            try:
                df_tmp = pro.index_member(index_code=item["index_code"])
                df_tmp["industry_name"] = item["industry_name"]
                df_tmp["level"] = item["level"].lower()
                df_tmp["src"] = item["src"].lower()
                df_results = df_results.append(df_tmp)
            except Exception as e2:
                raise ValueError(e2)
    df_results.con_code = QA_fmt_code_list(df_results.con_code)
    df_results = df_results.rename(columns={"con_code": "code"})
    df_results = df_results.sort_values(by="code")
    coll = DATABASE.industry
    coll.create_index(
        [
            ("code", ASCENDING),
            ("level", ASCENDING),
            ("src", ASCENDING),
            ("in_date_stamp", DESCENDING),
            ("out_date_stamp", DESCENDING),
        ],
        unique=False,
    )
    for item in QA_util_to_json_from_pandas(df_results):
        item["in_date_stamp"] = QA_util_date_stamp(item["in_date"])
        if not item["out_date"]:
            item["out_date_stamp"] = 9999999999
        else:
            item["out_date_stamp"] = QA_util_date_stamp(item["out_date_stamp"])
        coll.update_one(
            {
                "code": item["code"],
                "level": item["level"],
                "src": item["src"],
                "in_date_stamp": item["in_date_stamp"],
            },
            {"$set": item},
            upsert=True,
        )
    print('finished saving industry')
예제 #7
0
def QA_fetch_get_factor_groupby(factor: pd.Series,
                                industry_cls: str = "sw_l1",
                                detailed: bool = False) -> pd.DataFrame:
    """
    获取因子的行业暴露, 注意,返回的值是 pd.DataFrame 格式,包含原因子值,附加一列
    因子对应的行业信息 (需先自行导入聚宽本地 sdk 并登陆)

    参数
    ---
    :param factor: 因子值,索引为 ['日期' '资产']
    :param industry_cls: 行业分类,默认为申万 1 级行业
    :param detailed: 是否使用详细模式,默认为 False, 即取因子日期最后一日的行业信息

    返回值
    ---
    :return: 因子数据, 包括因子值,因子对应行业
    """
    warnings.warn("请先自行导入聚宽本地 sdk 并登陆", UserWarning)
    # 因子格式化
    factor = QA_fmt_factor(factor)
    merged_data = pd.DataFrame(factor.copy().rename("factor"))
    # 股票代码格式化
    stock_list = QA_fmt_code_list(
        factor.index.get_level_values("code").drop_duplicates(), style="jq")
    # 非详细模式, 行业数据采用当前日期
    if detailed:
        # start_time = str(min(factor.index.get_level_values("datetime")))[:10]
        # end_time = str(max(factor.index.get_level_values("datetime")))[:10]
        # date_range = list(
        #     map(pd.Timestamp, QA_util_get_trade_range(start_time, end_time))
        # )
        date_range = (factor.index.get_level_values(
            "datetime").drop_duplicates().tolist())

        df_local = pd.DataFrame()
        industries = map(partial(jqdatasdk.get_industry, stock_list),
                         date_range)
        industries = {
            d: {
                s: ind.get(s).get(industry_cls,
                                  dict()).get("industry_name", "NA")
                for s in stock_list
            }
            for d, ind in zip(date_range, industries)
        }
    else:
        end_time = str(max(factor.index.get_level_values("datetime")))[:10]
        date_range = [pd.Timestamp(end_time)]
        industries = jqdatasdk.get_industry(stock_list, end_time)
        industries = {
            d: {
                s: industries.get(s).get(industry_cls,
                                         dict()).get("industry_name", "NA")
                for s in stock_list
            }
            for d in date_range
        }
    # 可能历史上股票没有行业信息,用之后的行业信息往前填充
    df_local = pd.DataFrame(industries).T.sort_index()
    df_local.columns = df_local.columns.map(str).str.slice(0, 6)
    ss_local = df_local.stack(level=-1)
    ss_local.index.names = ["date", "code"]
    merged_data["date"] = merged_data.index.get_level_values("datetime").map(
        lambda x: x.date())
    merged_data = (merged_data.reset_index().set_index([
        "date", "code"
    ]).assign(group=ss_local).reset_index().set_index(["datetime",
                                                       "code"]).drop("date",
                                                                     axis=1))
    group = merged_data["group"].unstack().bfill().stack()
    merged_data["group"] = group
    return merged_data
예제 #8
0
def QA_fetch_get_individual_financial(code: str,
                                      start: Union[str, datetime.datetime,
                                                   pd.Timestamp] = None,
                                      end: Union[str, datetime.datetime,
                                                 pd.Timestamp] = None,
                                      report_date: Union[
                                          str, datetime.datetime] = None,
                                      sheet_type: str = "income",
                                      report_type: Union[int, str] = 1,
                                      fields: Union[str, Tuple, List] = None,
                                      wait_seconds: int = 61,
                                      max_trial: int = 3) -> pd.DataFrame:
    """个股财务报表网络查询接口,注意,这里的 start 与 end 是针对 report_date 进行范围查询

    Args:
        code (str): 股票代码
        start (Union[str, datetime.datetime, pd.Timestamp], optional): 查询起始时间,默认为 None
        end (Union[str, datetime.datetime, pd.Timestamp], optional): 查询结束时间,默认为 None
        report_date (Union[str, datetime.datetime], optional): 报告期. 默认为 None,如果使用了 report_date, 则 start 与 end 参数不再起作用
        sheet_type (str, optional): 报表类型,默认为 "income" 类型
            (利润表 "income"|
            资产负债表 "balancesheet"|
            现金流量表 "cashflow"|
            业绩预告 "forecast"|
            业绩快报 "express")
        report_type (Union[int, str], optional): 报告类型. 默认为 1。
            (1	合并报表	上市公司最新报表(默认)|
            2	单季合并	单一季度的合并报表 |
            3	调整单季合并表	调整后的单季合并报表(如果有) |
            4	调整合并报表	本年度公布上年同期的财务报表数据,报告期为上年度 |
            5	调整前合并报表	数据发生变更,将原数据进行保留,即调整前的原数据 |
            6	母公司报表	该公司母公司的财务报表数据 |
            7	母公司单季表	母公司的单季度表 |
            8	母公司调整单季表	母公司调整后的单季表 |
            9	母公司调整表	该公司母公司的本年度公布上年同期的财务报表数据 |
            10 母公司调整前报表	母公司调整之前的原始财务报表数据 |
            11 调整前合并报表	调整之前合并报表原数据 |
            12 母公司调整前报表	母公司报表发生变更前保留的原数据)
        fields (Union[str, Tuple, List], optional): 指定数据范围,如果设置为 None,则返回所有数据. 默认为 None.
        wait_seconds (int, optional): 等待重试时间. 默认为 61 秒.
        max_trial (int, optional): 最大重试次数. 默认为 3.

    Returns:
        pd.DataFrame: 返回指定个股时间范围内指定类型的报表数据
    """
    def _get_individual_financial(code, report_date, report_type, sheet_type,
                                  fields, wait_seconds, trial_count):
        nonlocal pro, max_trial
        if trial_count >= max_trial:
            raise ValueError("[ERROR]\tEXCEED MAX TRIAL!")
        try:
            if not fields:
                df = eval(
                    f"pro.{sheet_type}(ts_code='{code}', period='{report_date}', report_type={report_type})"
                )
            else:
                df = eval(
                    f"pro.{sheet_type}(ts_code='{code}', period='{report_date}', report_type={report_type}, fields={fields})"
                )
            return df.rename(columns={
                "ts_code": "code",
                "end_date": "report_date"
            })
        except Exception as e:
            print(e)
            time.sleep(wait_seconds)
            _get_individual_financial(code, report_date, report_type,
                                      sheet_type, fields, wait_seconds,
                                      trial_count + 1)

    pro = get_pro()
    report_type = int(report_type)
    if (not start) and (not end) and (not report_date):
        raise ValueError(
            "[QRY_DATES ERROR]\tparam 'start', 'end' and 'report_date' should not be none at the same time!"
        )
    if isinstance(fields, str):
        fields = sorted(
            list(
                set([
                    fields, "ts_code", "end_date", "ann_date", "f_ann_date",
                    "report_type", "update_flag"
                ])))
    if report_date:
        report_date = pd.Timestamp(report_date)
        year = report_date.year
        report_date_lists = [
            pd.Timestamp(str(year) + report_date_tail)
            for report_date_tail in REPORT_DATE_TAILS
        ]
        if report_date not in report_date_lists:
            raise ValueError("[REPORT_DATE ERROR]")
        if sheet_type not in [
                "income", "balancesheet", "cashflow", "forecast", "express"
        ]:
            raise ValueError("[SHEET_TYPE ERROR]")
        if report_type not in range(1, 13):
            raise ValueError("[REPORT_TYPE ERROR]")
        report_dates = [report_date]
    else:
        start = pd.Timestamp(start)
        start_year = start.year
        end = pd.Timestamp(end)
        end_year = end.year
        origin_year_ranges = pd.date_range(str(start_year),
                                           str(end_year + 1),
                                           freq='Y').map(str).str.slice(
                                               0, 4).tolist()
        origin_report_ranges = pd.Series([
            pd.Timestamp(year + report_date_tail)
            for year in origin_year_ranges
            for report_date_tail in REPORT_DATE_TAILS
        ])
        report_dates = origin_report_ranges.loc[
            (origin_report_ranges >= start) & (origin_report_ranges <= end)]
    df = pd.DataFrame()
    for report_date in report_dates:
        df = df.append(
            _get_individual_financial(
                code=QA_fmt_code(code, "ts"),
                report_date=report_date.strftime("%Y%m%d"),
                report_type=report_type,
                sheet_type=sheet_type,
                fields=fields,
                wait_seconds=wait_seconds,
                trial_count=0))
    df.code = QA_fmt_code_list(df.code)
    return df.reset_index(drop=True)
예제 #9
0
def QA_fetch_get_daily_basic(code: Union[str, List, Tuple] = None,
                             trade_date: Union[str, pd.Timestamp,
                                               datetime.datetime] = None,
                             fields: Union[str, List, Tuple] = None,
                             wait_seconds: int = 61,
                             max_trial: int = 3) -> pd.DataFrame:
    """
    从网络获取市场指定交易日重要基本面指标,用于选股分析和报表展示

    Args:
        code(Union[str, List, Tuple], optional): 指定股票代码,默认为 None,即对应交易日的全市场股票
        trade_date(Union[str, pd.Timestamp, datetime.datetime], optional): 指定交易日期, 默认为 None, 即距离当前
            日期最近的交易日
        fields(Union[str, List, Tuple], optional): 默认为 None,如果指定为某一单个 str,默认返回 DataFrame 包括
            交易日等附加信息
        wait_seconds (int, optional): 查询超时时间, 默认为 61.
        max_trial (int, optional): 查询最大尝试次数, 默认为 3.

    Returns:
        pd.DataFrame: 指定交易日指定范围指定标的的每日基本面指标信息
    """
    def _fetch_get_daily_basic(trade_date, fields, trial_count):
        nonlocal pro, max_trial
        try:
            if trial_count >= max_trial:
                raise ValueError("[ERROR]\tEXCEED MAX TRIAL!")
            if not trade_date:
                trade_date = QA_util_get_pre_trade_date(
                    datetime.date.today(), 1).replace("-", "")
            else:
                trade_date = pd.Timestamp(trade_date).strftime("%Y%m%d")
            if not fields:
                qry = f"pro.daily_basic(trade_date={trade_date})"
            else:
                if isinstance(fields, str):
                    fields = list(set([fields] + ["ts_code", "trade_date"]))
                fields = ",".join(fields)
                qry = "pro.daily_basic(trade_date={trade_date}, fields={fields})"
            df = eval(qry)
            if df is None:
                raise ValueError("[ERROR]")
            return df
        except:
            time.sleep(61)
            _fetch_get_daily_basic(trade_date, fields, trial_count + 1)

    pro = get_pro()
    df = _fetch_get_daily_basic(trade_date=trade_date,
                                fields=fields,
                                trial_count=0)
    if df.empty:
        return df
    else:
        df = df.rename(columns={"ts_code": "code"})
        df.code = QA_fmt_code_list(df.code)
        df = df.set_index("code")
    if not code:
        return df
    if isinstance(code, str):
        code = (code, )
    # exclude code which not in rtn dataframe
    filter_idx = df.index.intersection(code)
    return df.loc[filter_idx]
예제 #10
0
def QA_fetch_get_factor_groupby(
        factor: pd.Series,
        industry_cls: str = "sw_l1",
        detailed: bool = False
) -> pd.DataFrame:
    """
    获取因子的行业暴露, 注意,返回的值是 pd.DataFrame 格式,包含原因子值,附加一列
    因子对应的行业信息 (需先自行导入聚宽本地 sdk 并登陆)

    参数
    ---
    :param factor: 因子值,索引为 ['日期' '资产']
    :param industry_cls: 行业分类,默认为申万 1 级行业
    :param detailed: 是否使用详细模式,默认为 False, 即取因子日期最后一日的行业信息

    返回值
    ---
    :return: 因子数据, 包括因子值,因子对应行业
    """
    warnings.warn("请先自行导入聚宽本地 sdk 并登陆", UserWarning)
    # 因子格式化
    factor = QA_fmt_factor(factor)
    merged_data = pd.DataFrame(factor.copy().rename("factor"))
    # 股票代码格式化
    stock_list = QA_fmt_code_list(
        factor.index.get_level_values("code").drop_duplicates(),
    )
    # 非详细模式, 行业数据采用当前日期
    ss = pd.Series()
    if detailed:
        # start_time = str(min(factor.index.get_level_values("datetime")))[:10]
        # end_time = str(max(factor.index.get_level_values("datetime")))[:10]
        # date_range = list(
        #     map(pd.Timestamp, QA_util_get_trade_range(start_time, end_time))
        # )
        date_range = (
            factor.index.get_level_values(
                "datetime").drop_duplicates().tolist()
        )

        industry = pd.DataFrame()
        for cursor_date in date_range:
            df_tmp = QA_fetch_industry_adv(code = stock_list, cursor_date = cursor_date)[["code", "industry_name"]]
            df_tmp["date"] = cursor_date
            industry = industry.append(df_tmp)
        ss = industry.set_index(["date", "code"])["industry_name"]
        # industries = map(
        #     partial(jqdatasdk.get_industry,
        #             stock_list),
        #     date_range
        # )
        # industries = {
        #     d: {
        #         s: ind.get(s).get(industry_cls,
        #                           dict()).get("industry_name",
        #                                       "NA")
        #         for s in stock_list
        #     }
        #     for d,
        #     ind in zip(date_range,
        #                industries)
        # }
    else:
        end_time = str(max(factor.index.get_level_values("datetime")))[:10]
        date_range = [pd.Timestamp(end_time)]
        # industries = jqdatasdk.get_industry(stock_list, end_time)
        ss = QA_fetch_industry_adv(stock_list, end_time)[["code", "industry_name"]].set_index(["date", "code"])["industry_name"]
        # industries = {
        #     d: {
        #         s: industries.get(s).get(industry_cls,
        #                                  dict()).get("industry_name",
        #                                              "NA")
        #         for s in stock_list
        #     }
        #     for d in date_range
        # }
    # 可能历史上股票没有行业信息,用之后的行业信息往前填充
    merged_data["date"] = merged_data.index.get_level_values("datetime").map(
        lambda x: x.date()
    )
    merged_data = (
        merged_data.reset_index().set_index(
            ["date",
             "code"]
        ).assign(group=ss).reset_index().set_index(["datetime",
                                                          "code"]
                                                         ).drop("date",
                                                                axis=1)
    )
    group = merged_data["group"].unstack().bfill().stack()
    merged_data["group"] = group
    return merged_data