Example #1
0
def QA_fetch_daily_basic(
        code: Union[str, List, Tuple] = None,
        start: Union[str, pd.Timestamp, datetime.datetime] = None,
        end: Union[str, pd.Timestamp, datetime.datetime] = None,
        cursor_date: Union[str, pd.Timestamp, datetime.datetime] = None,
        fields: Union[str, Tuple, List] = None) -> pd.DataFrame:
    """获取全部股票每日重要的基本面指标,可用于选股分析、报表展示等

    Args:
        code (Union[str, List, Tuple], optional): 指定股票代码或列表,  默认为 None,获取全市场
        start (Union[str, pd.Timestamp, datetime.datetime], optional): 起始日期,默认为 None
        end (Union[str, pd.Timestamp, datetime.datetime], optional): 结束日期,默认为 None
        cursor_date (Union[str, pd.Timestamp, datetime.datetime], optional): 指定日期,与 start 和 end 冲突,只能选择 cursor_date
           或者 start, end
        fields (Union[str, Tuple, List], optional): 指定 fields

    Returns:
        pd.DataFrame: 以日期,股票名为 Multiindex 的基本信息
    """
    if isinstance(code, str):
        code = (code, )
    if not code:
        if (not start) and (not cursor_date):
            raise ValueError(
                "[ERROR]\tstart and end and cursor_date cannot all be none!")
        if not cursor_date:
            if not end:
                end_stamp = QA_util_date_stamp(datetime.date.today())
            else:
                end_stamp = QA_util_date_stamp(end)
            start_stamp = QA_util_date_stamp(start)
            qry = {
                "trade_date_stamp": {
                    "$gte": start_stamp,
                    "$lte": end_stamp
                }
            }
        else:
            real_trade_date = QA_util_get_real_date(cursor_date)
            trade_date_stamp = QA_util_date_stamp(real_trade_date)
            qry = {"trade_date_stamp": trade_date_stamp}
    else:
        if (not start) and (not cursor_date):
            raise ValueError(
                "[ERROR]\tstart and end and cursor_date cannot all be none!")
        if not cursor_date:
            if not end:
                end_stamp = QA_util_date_stamp(datetime.date.today())
            else:
                end_stamp = QA_util_date_stamp(end)
            start_stamp = QA_util_date_stamp(start)
            qry = {
                "code": {
                    "$in": code
                },
                "trade_date_stamp": {
                    "$gte": start_stamp,
                    "$lte": end_stamp
                }
            }
        else:
            real_trade_date = QA_util_get_real_date(cursor_date)
            trade_date_stamp = QA_util_date_stamp(real_trade_date)
            qry = {"code": {"$in": code}, "trade_date_stamp": trade_date_stamp}
    coll = DATABASE.daily_basic
    cursor = coll.find(qry)
    df = pd.DataFrame(cursor)
    if df.empty:
        return df
    df = df.rename(columns={"trade_date": "date"}).drop(columns="_id")
    df.date = pd.to_datetime(df.date, utc=False)
    df = df.set_index(["date", "code"]).sort_index()
    if not fields:
        return df
    return df[fields]
Example #2
0
def QA_fetch_last_financial(
        code: Union[str, List, Tuple] = None,
        cursor_date: Union[str, datetime.datetime, pd.Timestamp] = None,
        report_label: Union[int, str] = None,
        report_type: Union[int, str, List, Tuple] = None,
        sheet_type: str = "income",
        fields: Union[str, List, Tuple] = None) -> pd.DataFrame:
    """获取距离指定日期 (cursor_date) 最近的原始数据 (不包含在 cursor_date 发布的财务数据),
       当同时输入 cursor_date 与 report_date 时,以 report_date 作为查询标准
       注意:
           这里的 report_type 仅支持 (1,4, 5) 三种类型,以避免混淆合并数据和单季数据等
       说明:
           柳工 (000528) 在 2018 年 8 月 30 日发布半年报,之后在 2018 年 9 月 29 日发布修正报告,
           - 如果输入的 cursor_date 为 2018-08-31, 那么获取到的就是原始半年报,对应 report_type == 5
           - 如果输入的 cursor_date 为 2018-09-30,那么获取到的就是最新合并报表,对应 report_type == 1
           - 如果对应的 cursor_date 为 2019-08-31,需要获取 2018 年半年报,那么就返回柳工在 2019 年 8 月 29 日发布的上年同期基准,对应 report_type == 4

    Args:
        code (Union[str, List, Tuple], optional): 股票代码或股票列表,默认为 None, 查询所有股票
        cursor_date (Union[str, datetime.datetime, pd.Timestamp]): 查询截面日期 (一般指调仓日), 默认为 None
        report_label (Union[str, int], optional): 指定报表类型,这里的类型分类为一季报,半年报,三季报,年报, 默认为 None,即选择距离 cursor_date 最近的报表类型
        report_type (Union[str, List, Tuple], optional): [description]. 报表类型,默认为 None. 即距离 cursor_date 最近的财报,不指定类型,避免引入未来数据
            (1	合并报表	上市公司最新报表(默认)|
             2  单季合并报表
             4	调整合并报表	本年度公布上年同期的财务报表数据,报告期为上年度 |
             5	调整前合并报表	数据发生变更,将原数据进行保留,即调整前的原数据)
        sheet_type (str, optional): 报表类型,默认为 "income".
        fields (Union[str, List, Tuple], optional): 字段, 默认为 None, 返回所有字段

    Returns:
        pd.DataFrame: 复合条件的财务数据
    """
    def _trans_financial_type(x):
        if x.empty:
            return x
        if sheet_type == "balancesheet":
            # 资产负债表属于时点信息,直接返回
            return x
        else:
            if x.iloc[0].report_date[4:] in ['0331', '1231']:
                # 一季报而言,单季合并与普通合并没有区别,直接返回
                # 年报而言,不存在单季概念
                return x.iloc[0]
            if x.iloc[0].report_type in ['1', '4', '5']:
                return x.iloc[0]
            if x.iloc[0].report_type == '2':
                # 尝试查找同一报告期报告类型为 '1' 或 '4' 的报表数据
                # try:
                #     if (x.shape[0] > 1) & (x.iloc[1].report_date == x.iloc[0].report_date) & (x.iloc[1].report_type in ['1', '4']):
                #         return x.iloc[1]
                # except:
                #     return pd.Series()
                # 尝试直接利用单季数据进行拼接
                cursor_x = x.loc[x.report_date.map(str).str.slice(0, 4) ==
                                 x.iloc[0].report_date[:4]]
                cursor_x = cursor_x.drop_duplicates(subset=['report_date'],
                                                    keep='first')
                cursor_x = cursor_x.loc[
                    cursor_x.report_date <= x.iloc[0].report_date]
                cursor_x = cursor_x.fillna(0)
                non_numeric_columns = sorted([
                    "f_ann_date", "f_ann_date_stamp", "ann_date",
                    "ann_date_stamp", "report_date", "report_date_stamp",
                    "update_flag", "report_type", "code", "report_label"
                ])
                columns = sorted(
                    list(set(cursor_x.columns) - set(non_numeric_columns)))
                rtn_se = cursor_x[columns].sum(axis=0)
                rtn_se = rtn_se.append(cursor_x[non_numeric_columns].iloc[0])
                return rtn_se

    if isinstance(code, str):
        code = (code, )
    if not report_type:
        report_type = ["1", "2", "4", "5"]
    else:
        if isinstance(report_type, int):
            report_type = str(report_type)
        if isinstance(report_type, str):
            if report_type not in ["1", "4", "5"]:
                raise ValueError("[REPORT_TYPE ERROR]")
            report_type = (report_type, )
        else:
            report_type = list(set(report_type) & set('1', '2', '4', '5'))

    if sheet_type not in SHEET_TYPE:
        raise ValueError(f"[SHEET_TYPE ERROR]")
    if report_label:
        report_label = str(report_label)

    if isinstance(fields, str):
        fields = list(
            set([
                fields, "code", "ann_date", "report_date", "f_ann_date",
                "report_type"
            ]))
    elif fields:
        fields = list(
            set(fields + [
                "code", "ann_date", "report_date", "f_ann_date", "report_type"
            ]))

    coll = eval(f"DATABASE.{sheet_type}")
    if (not code) and (not report_label):
        # 为了加快检索速度,从当前日期往前至多回溯一季度,实际调仓时,仅考虑当前能拿到的最新数据,调仓周期一般以月, 季为单位,
        # 最长一般为年报,而修正报表如果超过 1 个季度,基本上怼调仓没有影响,这里以 1 年作为回溯基准
        qry = {
            "f_ann_date_stamp": {
                "$gt":
                QA_util_date_stamp(
                    (pd.Timestamp(cursor_date) -
                     pd.Timedelta(days=400)).strftime("%Y-%m-%d")),
                "$lt":
                QA_util_date_stamp(cursor_date)
            },
            "report_type": {
                "$in": report_type
            }
        }
        cursor = coll.find(qry, batch_size=10000).sort([
            ("report_date_stamp", pymongo.DESCENDING),
            ("f_ann_date_stamp", pymongo.DESCENDING)
        ])
        try:
            if not fields:
                df = pd.DataFrame(cursor).drop(columns="_id")
            else:
                df = pd.DataFrame(cursor).drop(columns="_id")[fields]
        except:
            raise ValueError("[QRY ERROR]")
        if sheet_type == "balancesheet":
            return df.groupby("code").apply(lambda x: x.iloc[0])
        return df.groupby("code").apply(_trans_financial_type).unstack()
    if not report_label:
        qry = {
            "code": {
                "$in": code
            },
            "f_ann_date_stamp": {
                "$gt":
                QA_util_date_stamp(
                    (pd.Timestamp(cursor_date) -
                     pd.Timedelta(days=400)).strftime("%Y-%m-%d")),
                "$lt":
                QA_util_date_stamp(cursor_date)
            },
            "report_type": {
                "$in": report_type
            }
        }
        cursor = coll.find(qry, batch_size=10000).sort([
            ("report_date_stamp", pymongo.DESCENDING),
            ("f_ann_date_stamp", pymongo.DESCENDING)
        ])
        try:
            if not fields:
                df = pd.DataFrame(cursor).drop(columns="_id")
            else:
                df = pd.DataFrame(cursor).drop(columns="_id")[fields]
        except:
            raise ValueError("[QRY ERROR]")
        if sheet_type == "balancesheet":
            return df.groupby("code").apply(lambda x: x.iloc[0])
        return df.groupby("code").apply(_trans_financial_type).unstack()
    if not code:
        qry = {
            "f_ann_date_stamp": {
                "$gt":
                QA_util_date_stamp(
                    (pd.Timestamp(cursor_date) -
                     pd.Timedelta(days=400)).strftime("%Y-%m-%d")),
                "$lt":
                QA_util_date_stamp(cursor_date)
            },
            "report_type": {
                "$in": report_type
            },
            "report_label": report_label
        }
        cursor = coll.find(qry, batch_size=10000).sort([
            ("report_date_stamp", pymongo.DESCENDING),
            ("f_ann_date_stamp", pymongo.DESCENDING)
        ])
        try:
            if not fields:
                df = pd.DataFrame(cursor).drop(columns="_id")
            else:
                df = pd.DataFrame(cursor).drop(columns="_id")[fields]
        except:
            raise ValueError("[QRY ERROR]")
        if sheet_type == "balancesheet":
            return df.groupby("code").apply(lambda x: x.iloc[0])
        return df.groupby("code").apply(_trans_financial_type).unstack()
    else:
        qry = {
            "code": {
                "$in": code
            },
            "f_ann_date_stamp": {
                "$gt":
                QA_util_date_stamp(
                    (pd.Timestamp(cursor_date) -
                     pd.Timedelta(days=400)).strftime("%Y-%m-%d")),
                "$lt":
                QA_util_date_stamp(cursor_date)
            },
            "report_type": {
                "$in": report_type
            },
            "report_label": report_label
        }
        cursor = coll.find(qry, batch_size=10000).sort([
            ("report_date_stamp", pymongo.DESCENDING),
            ("f_ann_date_stamp", pymongo.DESCENDING)
        ])
        try:
            if not fields:
                df = pd.DataFrame(cursor).drop(columns="_id")
            else:
                df = pd.DataFrame(cursor).drop(columns="_id")[fields]
        except:
            raise ValueError("[QRY ERROR]")
        # df.report_date = pd.to_datetime(df.report_date, utc=False)
        # df.ann_date = pd.to_datetime(df.ann_date, utc=False)
        # df.f_ann_date = pd.to_datetime(df.f_ann_date, utc=False)
        if sheet_type == "balancesheet":
            return df.groupby("code").apply(lambda x: x.iloc[0])
        return df.groupby("code").apply(_trans_financial_type).unstack()
Example #3
0
def QA_fetch_industry_adv(code: Union[str, List, Tuple] = None,
                          cursor_date: Union[str, datetime.datetime] = None,
                          start: Union[str, datetime.datetime] = None,
                          end: Union[str, datetime.datetime] = None,
                          levels: Union[str, List, Tuple] = None,
                          src: str = "sw") -> pd.DataFrame:
    """本地获取指定股票或股票列表的行业

    Args:
        code (Union[str, List, Tuple], optional): 股票代码或列表,默认为 None, 查询所有股票代码.
        cursor_date (Union[str, datetime.datetime], optional): 一般指调仓日,此时不需要再设置 start 与 end
        start(Union[str, datetime.datetime], optional): 起始时间,默认为 None.
        end(Union[str, datetime.datetime], optional): 截止时间, 默认为 None.
        levels (Union[str, List, Tuple], optional): [description]. 对应行业分级级别,默认为 None,查询所有行业分级数据
        src (str, optional): 分级来源,默认为 "sw"(目前仅支持申万行业分类).

    Returns:
        pd.DataFrame: 行业信息
    """
    coll = DATABASE.industry
    if not code:
        code = QA_fetch_stock_list().index.tolist()
    if isinstance(code, str):
        code = [code]
    if isinstance(levels, str):
        levels = [
            levels,
        ]
    if not levels:
        levels = ["l1", "l2", "l3"]
    levels = list(map(lambda x: x.lower(), levels))
    df_tmp = pd.DataFrame()
    if not cursor_date:
        if not start:
            qry = {
                "code": {
                    "$in": code
                },
                "level": {
                    "$in": levels
                },
                "src": src.lower()
            }
        else:
            qry = {
                "code": {
                    "$in": code
                },
                "level": {
                    "$in": levels
                },
                "src": src.lower(),
                "in_date_stamp": {
                    "$lte":
                    QA_util_date_stamp(
                        pd.Timestamp(start).strftime("%Y-%m-%d"))
                }
            }
        if coll.count_documents(filter=qry) < 1:
            print("找不到对应行业数据")
            return pd.DataFrame()
        cursor = coll.find(qry)
        df_tmp = pd.DataFrame(cursor).drop(columns="_id")
        if end:
            df_tmp = df_tmp.loc[df_tmp.out_date_stamp > QA_util_date_stamp(
                pd.Timestamp(end).strftime("%Y-%m-%d"))]
    else:
        qry = {
            "code": {
                "$in": code
            },
            "level": {
                "$in": levels
            },
            "src": src.lower(),
            "in_date_stamp": {
                "$lte":
                QA_util_date_stamp(
                    pd.Timestamp(cursor_date).strftime("%Y-%m-%d"))
            }
        }
        if coll.count_documents(filter=qry) < 1:
            print("找不到对应行业数据")
            return pd.DataFrame()
        cursor = coll.find(qry)
        df_tmp = pd.DataFrame(cursor).drop(columns="_id")
        df_tmp.loc[df_tmp.out_date_stamp > QA_util_date_stamp(
            pd.Timestamp(cursor_date).strftime("%Y-%m-%d"))]
        df_tmp.in_date = pd.to_datetime(df_tmp.in_date, utc=False)
        df_tmp.out_date = pd.to_datetime(df_tmp.out_date, utc=False)
    return df_tmp.drop(columns=["in_date_stamp", "out_date_stamp"])
Example #4
0
def QA_fetch_financial_adv(
        code: Union[str, Tuple, List] = None,
        start: Union[str, datetime.datetime, pd.Timestamp] = None,
        end: Union[str, datetime.datetime, pd.Timestamp] = None,
        report_date: Union[str, datetime.datetime, pd.Timestamp] = None,
        report_type: Union[int, str] = None,
        sheet_type: str = "income",
        fields: Union[str, Tuple, List] = None) -> pd.DataFrame:
    """本地获取指定股票或者指定股票列表,指定时间范围或者报告期,指定报告类型的指定财务报表数据

    Args:
        code (Union[str, Tuple, List], optional): 指定股票代码或列表,默认为 None, 全市场股票
        start (Union[str, datetime.datetime, pd.Timestamp], optional): 起始时间
        end (Union[str, datetime.datetime, pd.Timestamp], optional): 结束时间
        report_date (Union[str, datetime.datetime, pd.Timestamp], optional): 报告期
        report_type (Union[int, str], optional): 报告类型,默认为 1.
            (1	合并报表	上市公司最新报表(默认)|
             2	单季合并	单一季度的合并报表 |
             3	调整单季合并表	调整后的单季合并报表(如果有) |
             4	调整合并报表	本年度公布上年同期的财务报表数据,报告期为上年度 |
             5	调整前合并报表	数据发生变更,将原数据进行保留,即调整前的原数据 |
             11 调整前合并报表	调整之前合并报表原数据)
        sheet_type (str, optional): 报表类型,默认为 "income".
        fields (List, optional): 字段,默认为 None,返回所有字段.

    Returns:
        pd.DataFrame: 指定条件的本地报表数据
    """
    if (not start) and (not end) and (not report_date):
        raise ValueError(
            "[DATE ERROR]\t 'start', 'end' 与 'report_date' 不能同时为 None")
    if isinstance(code, str):
        code = (code, )
    if not report_type:
        report_type = ("1", "2", "4", "5", "11")
    if isinstance(report_type, int) or isinstance(report_type, str):
        report_type = (str(report_type), )
    else:
        report_type = list(map(str, report_type))

    coll = eval(f"DATABASE.{sheet_type}")
    qry = {}
    if not report_date:
        if not end:
            end = datetime.date.today()
        start = pd.Timestamp(start)
        end = pd.Timestamp(end)
        start_date_stamp = QA_util_date_stamp(start)
        end_date_stamp = QA_util_date_stamp(end)
        if not code:
            qry = {
                "f_ann_date_stamp": {
                    "$gte": start_date_stamp,
                    "$lte": end_date_stamp
                },
                "report_type": {
                    "$in": report_type
                }
            }
        else:
            qry = {
                "code": {
                    "$in": code
                },
                "f_ann_date_stamp": {
                    "$gte": start_date_stamp,
                    "$lte": end_date_stamp
                },
                "report_type": {
                    "$in": report_type
                }
            }
    else:
        report_date_stamp = QA_util_date_stamp(report_date)
        if not code:
            qry = {
                "report_date_stamp": report_date_stamp,
                "report_type": {
                    "$in": report_type
                }
            }
        else:
            qry = {
                "code": {
                    "$in": code
                },
                "report_date_stamp": report_date_stamp,
                "report_type": {
                    "$in": report_type
                }
            }
    if isinstance(fields, str):
        fields = list(
            set([fields, "code", "ann_date", "report_date", "f_ann_date"]))
    elif fields:
        fields = list(
            set(
                list(fields) +
                ["code", "ann_date", "report_date", "f_ann_date"]))

    cursor = coll.find(qry, batch_size=10000).sort([
        ("report_date_stamp", pymongo.ASCENDING),
        ("f_ann_date_stamp", pymongo.ASCENDING)
    ])
    if fields:
        df = pd.DataFrame(cursor).drop(columns="_id")[fields].set_index("code")
        df.report_date = pd.to_datetime(df.report_date, utc=False)
        df.ann_date = pd.to_datetime(df.ann_date, utc=False)
        df.f_ann_date = pd.to_datetime(df.f_ann_date, utc=False)
    else:
        df = pd.DataFrame(cursor).drop(columns="_id").set_index("code")
        df.report_date = pd.to_datetime(df.report_date, utc=False)
        df.ann_date = pd.to_datetime(df.ann_date, utc=False)
        df.f_ann_date = pd.to_datetime(df.f_ann_date, utc=False)
    return df
Example #5
0
def QA_fetch_last_financial(
        code: Union[str, List, Tuple] = None,
        cursor_date: Union[str, datetime.datetime, pd.Timestamp] = None,
        report_label: Union[int, str] = None,
        report_type: Union[int, str, List, Tuple] = None,
        sheet_type: str = "income",
        fields: Union[str, List, Tuple] = None) -> pd.DataFrame:
    """获取距离指定日期 (cursor_date) 最近的原始数据 (不包含在 cursor_date 发布的财务数据),
       当同时输入 cursor_date 与 report_date 时,以 report_date 作为查询标准
       注意:
           这里的 report_type 仅支持 (1,4, 5) 三种类型,以避免混淆合并数据和单季数据等
       说明:
           柳工 (000528) 在 2018 年 8 月 30 日发布半年报,之后在 2018 年 9 月 29 日发布修正报告,
           - 如果输入的 cursor_date 为 2018-08-31, 那么获取到的就是原始半年报,对应 report_type == 5
           - 如果输入的 cursor_date 为 2018-09-30,那么获取到的就是最新合并报表,对应 report_type == 1
           - 如果对应的 cursor_date 为 2019-08-31,需要获取 2018 年半年报,那么就返回柳工在 2019 年 8 月 29 日发布的上年同期基准,对应 report_type == 4

    Args:
        code (Union[str, List, Tuple], optional): 股票代码或股票列表,默认为 None, 查询所有股票
        cursor_date (Union[str, datetime.datetime, pd.Timestamp]): 查询截面日期 (一般指调仓日), 默认为 None
        report_label (Union[str, int], optional): 指定报表类型,这里的类型分类为一季报,半年报,三季报,年报, 默认为 None,即选择距离 cursor_date 最近的报表类型
        report_type (Union[str, List, Tuple], optional): [description]. 报表类型,默认为 None. 即距离 cursor_date 最近的财报,不指定类型,避免引入未来数据
            (1	合并报表	上市公司最新报表(默认)|
             4	调整合并报表	本年度公布上年同期的财务报表数据,报告期为上年度 |
             5	调整前合并报表	数据发生变更,将原数据进行保留,即调整前的原数据)
        sheet_type (str, optional): 报表类型,默认为 "income".
        fields (Union[str, List, Tuple], optional): 字段, 默认为 None, 返回所有字段

    Returns:
        pd.DataFrame: 复合条件的财务数据
    """
    if isinstance(code, str):
        code = (code, )
    if not report_type:
        report_type = ["1", "4", "5"]
    else:
        if isinstance(report_type, int):
            report_type = str(report_type)
        if isinstance(report_type, str):
            if report_type not in ["1", "4", "5"]:
                raise ValueError("[REPORT_TYPE ERROR]")
            report_type = (report_type, )
        else:
            report_type = list(set(report_type) & set('1', '4', '5'))

    if sheet_type not in SHEET_TYPE:
        raise ValueError(f"[SHEET_TYPE ERROR]")
    if report_label:
        report_label = str(report_label)

    if isinstance(fields, str):
        fields = list(
            set([fields, "code", "ann_date", "report_date", "f_ann_date"]))
    elif fields:
        fields = list(
            set(fields + ["code", "ann_date", "report_date", "f_ann_date"]))

    coll = eval(f"DATABASE.{sheet_type}")
    if (not code) and (not report_label):
        # 为了加快检索速度,从当前日期往前至多回溯一季度,实际调仓时,仅考虑当前能拿到的最新数据,调仓周期一般以月, 季为单位,
        # 最长一般为年报,而修正报表如果超过 1 个季度,基本上怼调仓没有影响,这里以 1 年作为回溯基准
        qry = {
            "f_ann_date_stamp": {
                "$gt":
                QA_util_date_stamp(
                    (pd.Timestamp(cursor_date) -
                     pd.Timedelta(days=400)).strftime("%Y-%m-%d")),
                "$lt":
                QA_util_date_stamp(cursor_date)
            },
            "report_type": {
                "$in": report_type
            }
        }
        cursor = coll.find(qry, batch_size=10000).sort([
            ("report_date_stamp", pymongo.DESCENDING),
            ("f_ann_date_stamp", pymongo.DESCENDING)
        ])
        try:
            if not fields:
                df = pd.DataFrame(cursor).drop(columns="_id")
            else:
                df = pd.DataFrame(cursor).drop(columns="_id")[fields]
        except:
            raise ValueError("[QRY ERROR]")
        return df.groupby("code").apply(lambda x: x.iloc[0])
    if not report_label:
        qry = {
            "code": {
                "$in": code
            },
            "f_ann_date_stamp": {
                "$gt":
                QA_util_date_stamp(
                    (pd.Timestamp(cursor_date) -
                     pd.Timedelta(days=400)).strftime("%Y-%m-%d")),
                "$lt":
                QA_util_date_stamp(cursor_date)
            },
            "report_type": {
                "$in": report_type
            }
        }
        cursor = coll.find(qry, batch_size=10000).sort([
            ("report_date_stamp", pymongo.DESCENDING),
            ("f_ann_date_stamp", pymongo.DESCENDING)
        ])
        try:
            if not fields:
                df = pd.DataFrame(cursor).drop(columns="_id")
            else:
                df = pd.DataFrame(cursor).drop(columns="_id")[fields]
        except:
            raise ValueError("[QRY ERROR]")
        return df.groupby("code").apply(lambda x: x.iloc[0])
    if not code:
        qry = {
            "f_ann_date_stamp": {
                "$gt":
                QA_util_date_stamp(
                    (pd.Timestamp(cursor_date) -
                     pd.Timedelta(days=400)).strftime("%Y-%m-%d")),
                "$lt":
                QA_util_date_stamp(cursor_date)
            },
            "report_type": {
                "$in": report_type
            },
            "report_label": report_label
        }
        cursor = coll.find(qry, batch_size=10000).sort([
            ("report_date_stamp", pymongo.DESCENDING),
            ("f_ann_date_stamp", pymongo.DESCENDING)
        ])
        try:
            if not fields:
                df = pd.DataFrame(cursor).drop(columns="_id")
            else:
                df = pd.DataFrame(cursor).drop(columns="_id")[fields]
        except:
            raise ValueError("[QRY ERROR]")
        return df.groupby("code").apply(lambda x: x.iloc[0])
    else:
        qry = {
            "code": {
                "$in": code
            },
            "f_ann_date_stamp": {
                "$gt":
                QA_util_date_stamp(
                    (pd.Timestamp(cursor_date) -
                     pd.Timedelta(days=400)).strftime("%Y-%m-%d")),
                "$lt":
                QA_util_date_stamp(cursor_date)
            },
            "report_type": {
                "$in": report_type
            },
            "report_label": report_label
        }
        cursor = coll.find(qry, batch_size=10000).sort([
            ("report_date_stamp", pymongo.DESCENDING),
            ("f_ann_date_stamp", pymongo.DESCENDING)
        ])
        try:
            if not fields:
                df = pd.DataFrame(cursor).drop(columns="_id")
            else:
                df = pd.DataFrame(cursor).drop(columns="_id")[fields]
        except:
            raise ValueError("[QRY ERROR]")
        df.report_date = pd.to_datetime(df.report_date)
        df.ann_date = pd.to_datetime(df.ann_date)
        df.f_ann_date = pd.to_datetime(df.f_ann_date)
        return df.groupby("code").apply(lambda x: x.iloc[0])
Example #6
0
def QA_fetch_get_index_min(code,
                           start,
                           end,
                           frequence='1min',
                           ip=best_ip['stock'],
                           port=7709):
    '指数分钟线'
    api = TdxHq_API()
    type_ = ''

    start_date = str(start)[0:10]
    today_ = datetime.date.today()
    lens = QA_util_get_trade_gap(start_date, today_)
    if str(frequence) in ['5', '5m', '5min', 'five']:
        frequence, type_ = 0, '5min'
        lens = 48 * lens
    elif str(frequence) in ['1', '1m', '1min', 'one']:
        frequence, type_ = 8, '1min'
        lens = 240 * lens
    elif str(frequence) in ['15', '15m', '15min', 'fifteen']:
        frequence, type_ = 1, '15min'
        lens = 16 * lens
    elif str(frequence) in ['30', '30m', '30min', 'half']:
        frequence, type_ = 2, '30min'
        lens = 8 * lens
    elif str(frequence) in ['60', '60m', '60min', '1h']:
        frequence, type_ = 3, '60min'
        lens = 4 * lens

    if lens > 20800:
        lens = 20800
    with api.connect(ip, port):

        if str(code)[0] in ['5', '1']:  # ETF
            data = pd.concat([
                api.to_df(
                    api.get_security_bars(
                        frequence,
                        1 if str(code)[0] in ['0', '8', '9', '5'] else 0, code,
                        (int(lens / 800) - i) * 800, 800))
                for i in range(int(lens / 800) + 1)
            ],
                             axis=0)
        else:
            data = pd.concat([
                api.to_df(
                    api.get_index_bars(
                        frequence,
                        1 if str(code)[0] in ['0', '8', '9', '5'] else 0, code,
                        (int(lens / 800) - i) * 800, 800))
                for i in range(int(lens / 800) + 1)
            ],
                             axis=0)
        data = data\
            .assign(datetime=pd.to_datetime(data['datetime']), code=str(code))\
            .drop(['year', 'month', 'day', 'hour', 'minute'], axis=1, inplace=False)\
            .assign(date=data['datetime'].apply(lambda x: str(x)[0:10]))\
            .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x)))\
            .assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x)))\
            .assign(type=type_).set_index('datetime', drop=False, inplace=False)[start:end]
        # data
        return data.assign(datetime=data['datetime'].apply(lambda x: str(x)))
Example #7
0
def QA_fetch_get_stock_day(code,
                           start_date,
                           end_date,
                           if_fq='00',
                           frequence='day',
                           ip=best_ip['stock'],
                           port=7709):
    """获取日线及以上级别的数据


    Arguments:
        code {str:6} -- code 是一个单独的code 6位长度的str
        start_date {str:10} -- 10位长度的日期 比如'2017-01-01'
        end_date {str:10} -- 10位长度的日期 比如'2018-01-01'

    Keyword Arguments:
        if_fq {str} -- '00'/'bfq' -- 不复权 '01'/'qfq' -- 前复权 '02'/'hfq' -- 后复权 '03'/'ddqfq' -- 定点前复权 '04'/'ddhfq' --定点后复权
        frequency {str} -- day/week/month/quarter/year 也可以是简写 D/W/M/Q/Y
        ip {str} -- [description] (default: best_ip['stock']) ip可以通过select_best_ip()函数重新获取
        port {int} -- [description] (default: {7709})


    Returns:
        pd.DataFrame/None -- 返回的是dataframe,如果出错比如只获取了一天,而当天停牌,返回None

    Exception:
        如果出现网络问题/服务器拒绝, 会出现socket:time out 尝试再次获取/更换ip即可, 本函数不做处理
    """

    api = TdxHq_API()
    with api.connect(ip, port, time_out=0.7):

        if frequence in ['day', 'd', 'D', 'DAY', 'Day']:
            frequence = 9
        elif frequence in ['w', 'W', 'Week', 'week']:
            frequence = 5
        elif frequence in ['month', 'M', 'm', 'Month']:
            frequence = 6
        elif frequence in ['quarter', 'Q', 'Quarter', 'q']:
            frequence = 10
        elif frequence in ['y', 'Y', 'year', 'Year']:
            frequence = 11
        start_date = str(start_date)[0:10]
        today_ = datetime.date.today()
        lens = QA_util_get_trade_gap(start_date, today_)

        data = pd.concat([
            api.to_df(
                api.get_security_bars(frequence, _select_market_code(code),
                                      code, (int(lens / 800) - i) * 800, 800))
            for i in range(int(lens / 800) + 1)
        ],
                         axis=0)

        # 这里的问题是: 如果只取了一天的股票,而当天停牌, 那么就直接返回None了
        if len(data) < 1:
            return None
        data = data[data['open'] != 0]

        if if_fq in ['00', 'bfq']:
            data = data.assign(date=data['datetime'].apply(lambda x: str(x[0:10]))).assign(code=str(code))\
                .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(str(x)[0:10]))).set_index('date', drop=False, inplace=False)

            return data.drop(
                ['year', 'month', 'day', 'hour', 'minute', 'datetime'],
                axis=1)[start_date:end_date].assign(
                    date=data['date'].apply(lambda x: str(x)[0:10]))

        elif if_fq in ['01', 'qfq']:

            xdxr_data = QA_fetch_get_stock_xdxr(code)
            bfq_data = data.assign(date=pd.to_datetime(data['datetime'].apply(lambda x: str(x[0:10])))).assign(code=str(code))\
                .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(str(x)[0:10]))).set_index('date', drop=False, inplace=False)
            bfq_data = bfq_data.drop(
                ['year', 'month', 'day', 'hour', 'minute', 'datetime'], axis=1)
            #
            if xdxr_data is not None:
                info = xdxr_data[xdxr_data['category'] == 1]
                bfq_data['if_trade'] = True
                data = pd.concat(
                    [bfq_data, info[['category']][bfq_data.index[0]:]], axis=1)

                data['date'] = data.index
                data['if_trade'].fillna(value=False, inplace=True)
                data = data.fillna(method='ffill')
                data = pd.concat([
                    data, info[['fenhong', 'peigu', 'peigujia', 'songzhuangu'
                                ]][bfq_data.index[0]:]
                ],
                                 axis=1)
                data = data.fillna(0)

                data['preclose'] = (
                    data['close'].shift(1) * 10 - data['fenhong'] +
                    data['peigu'] * data['peigujia']) / (10 + data['peigu'] +
                                                         data['songzhuangu'])
                data['adj'] = (data['preclose'].shift(-1) /
                               data['close']).fillna(1)[::-1].cumprod()
                data['open'] = data['open'] * data['adj']
                data['high'] = data['high'] * data['adj']
                data['low'] = data['low'] * data['adj']
                data['close'] = data['close'] * data['adj']
                data['preclose'] = data['preclose'] * data['adj']

                data = data[data['if_trade']]
                return data.drop(
                    [
                        'fenhong', 'peigu', 'peigujia', 'songzhuangu',
                        'if_trade', 'category'
                    ],
                    axis=1)[data['open'] != 0].assign(date=data['date'].apply(
                        lambda x: str(x)[0:10]))[start_date:end_date]
            else:

                bfq_data['preclose'] = bfq_data['close'].shift(1)
                bfq_data['adj'] = 1
                return bfq_data[start_date:end_date]
        elif if_fq in ['03', 'ddqfq']:
            xdxr_data = QA_fetch_get_stock_xdxr(code)

            info = xdxr_data[xdxr_data['category'] == 1]

            bfq_data = data\
                .assign(date=pd.to_datetime(data['datetime'].apply(lambda x: x[0:10])))\
                .assign(code=str(code))\
                .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(str(x)[0:10])))\
                .set_index('date', drop=False, inplace=False)\
                .drop(['year', 'month', 'day', 'hour',
                       'minute', 'datetime'], axis=1)

            bfq_data['if_trade'] = True
            data = pd.concat(
                [bfq_data, info[['category']][bfq_data.index[0]:end_date]],
                axis=1)

            data['date'] = data.index
            data['if_trade'].fillna(value=False, inplace=True)
            data = data.fillna(method='ffill')
            data = pd.concat([
                data, info[['fenhong', 'peigu', 'peigujia', 'songzhuangu'
                            ]][bfq_data.index[0]:end_date]
            ],
                             axis=1)
            data = data.fillna(0)

            data['preclose'] = (data['close'].shift(1) * 10 - data['fenhong'] +
                                data['peigu'] * data['peigujia']) / (
                                    10 + data['peigu'] + data['songzhuangu'])
            data['adj'] = (data['preclose'].shift(-1) /
                           data['close']).fillna(1)[::-1].cumprod()
            data['open'] = data['open'] * data['adj']
            data['high'] = data['high'] * data['adj']
            data['low'] = data['low'] * data['adj']
            data['close'] = data['close'] * data['adj']
            data['preclose'] = data['preclose'] * data['adj']

            data = data[data['if_trade']]
            return data.drop(
                [
                    'fenhong', 'peigu', 'peigujia', 'songzhuangu', 'if_trade',
                    'category'
                ],
                axis=1)[data['open'] != 0].assign(date=data['date'].apply(
                    lambda x: str(x)[0:10]))[start_date:end_date]

        elif if_fq in ['02', 'hfq']:
            xdxr_data = QA_fetch_get_stock_xdxr(code)

            info = xdxr_data[xdxr_data['category'] == 1]

            bfq_data = data\
                .assign(date=pd.to_datetime(data['datetime'].apply(lambda x: x[0:10])))\
                .assign(code=str(code))\
                .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(str(x)[0:10])))\
                .set_index('date', drop=False, inplace=False)\
                .drop(['year', 'month', 'day', 'hour',
                       'minute', 'datetime'], axis=1)

            bfq_data['if_trade'] = True
            data = pd.concat(
                [bfq_data, info[['category']][bfq_data.index[0]:]], axis=1)

            data['date'] = data.index
            data['if_trade'].fillna(value=False, inplace=True)
            data = data.fillna(method='ffill')
            data = pd.concat([
                data, info[['fenhong', 'peigu', 'peigujia', 'songzhuangu'
                            ]][bfq_data.index[0]:]
            ],
                             axis=1)
            data = data.fillna(0)

            data['preclose'] = (data['close'].shift(1) * 10 - data['fenhong'] +
                                data['peigu'] * data['peigujia']) / (
                                    10 + data['peigu'] + data['songzhuangu'])
            data['adj'] = (data['preclose'].shift(-1) /
                           data['close']).fillna(1).cumprod()
            data['open'] = data['open'] / data['adj']
            data['high'] = data['high'] / data['adj']
            data['low'] = data['low'] / data['adj']
            data['close'] = data['close'] / data['adj']
            data['preclose'] = data['preclose'] / data['adj']
            data = data[data['if_trade']]
            return data.drop(
                [
                    'fenhong', 'peigu', 'peigujia', 'songzhuangu', 'if_trade',
                    'category'
                ],
                axis=1)[data['open'] != 0].assign(date=data['date'].apply(
                    lambda x: str(x)[0:10]))[start_date:end_date]

        elif if_fq in ['04', 'ddhfq']:
            xdxr_data = QA_fetch_get_stock_xdxr(code)

            info = xdxr_data[xdxr_data['category'] == 1]

            bfq_data = data\
                .assign(date=pd.to_datetime(data['datetime'].apply(lambda x: x[0:10])))\
                .assign(code=str(code))\
                .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(str(x)[0:10])))\
                .set_index('date', drop=False, inplace=False)\
                .drop(['year', 'month', 'day', 'hour',
                       'minute', 'datetime'], axis=1)

            bfq_data['if_trade'] = True
            data = pd.concat(
                [bfq_data, info[['category']][bfq_data.index[0]:end_date]],
                axis=1)

            data['date'] = data.index
            data['if_trade'].fillna(value=False, inplace=True)
            data = data.fillna(method='ffill')
            data = pd.concat([
                data, info[['fenhong', 'peigu', 'peigujia', 'songzhuangu'
                            ]][bfq_data.index[0]:end_date]
            ],
                             axis=1)
            data = data.fillna(0)

            data['preclose'] = (data['close'].shift(1) * 10 - data['fenhong'] +
                                data['peigu'] * data['peigujia']) / (
                                    10 + data['peigu'] + data['songzhuangu'])
            data['adj'] = (data['preclose'].shift(-1) /
                           data['close']).fillna(1).cumprod()
            data['open'] = data['open'] / data['adj']
            data['high'] = data['high'] / data['adj']
            data['low'] = data['low'] / data['adj']
            data['close'] = data['close'] / data['adj']
            data['preclose'] = data['preclose'] / data['adj']
            data = data[data['if_trade']]
            return data.drop(
                [
                    'fenhong', 'peigu', 'peigujia', 'songzhuangu', 'if_trade',
                    'category'
                ],
                axis=1)[data['open'] != 0].assign(date=data['date'].apply(
                    lambda x: str(x)[0:10]))[start_date:end_date]
Example #8
0
def QA_fetch_get_stock_day(code,
                           start_date,
                           end_date,
                           if_fq='00',
                           frequence='day',
                           ip=best_ip['stock'],
                           port=7709):
    api = TdxHq_API()
    with api.connect(ip, port):

        if frequence in ['day', 'd', 'D', 'DAY', 'Day']:
            frequence = 9
        elif frequence in ['w', 'W', 'Week', 'week']:
            frequence = 5
        elif frequence in ['month', 'M', 'm', 'Month']:
            frequence = 6
        elif frequence in ['Q', 'Quarter', 'q']:
            frequence = 10
        elif frequence in ['y', 'Y', 'year', 'Year']:
            frequence = 11
        start_date = str(start_date)[0:10]
        today_ = datetime.date.today()
        lens = QA_util_get_trade_gap(start_date, today_)

        data = pd.concat([
            api.to_df(
                api.get_security_bars(frequence, _select_market_code(code),
                                      code, (int(lens / 800) - i) * 800, 800))
            for i in range(int(lens / 800) + 1)
        ],
                         axis=0)

        data = data[data['open'] != 0]

        if if_fq in ['00', 'bfq']:
            data = data.assign(date=data['datetime'].apply(lambda x: str(x[0:10]))).assign(code=str(code))\
                .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(str(x)[0:10]))).set_index('date', drop=False, inplace=False)

            return data.drop(
                ['year', 'month', 'day', 'hour', 'minute', 'datetime'],
                axis=1)[start_date:end_date].assign(
                    date=data['date'].apply(lambda x: str(x)[0:10]))

        elif if_fq in ['01', 'qfq']:

            xdxr_data = QA_fetch_get_stock_xdxr(code)
            bfq_data = data.assign(date=pd.to_datetime(data['datetime'].apply(lambda x: str(x[0:10])))).assign(code=str(code))\
                .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(str(x)[0:10]))).set_index('date', drop=False, inplace=False)
            bfq_data = bfq_data.drop(
                ['year', 'month', 'day', 'hour', 'minute', 'datetime'], axis=1)
            #
            if xdxr_data is not None:
                info = xdxr_data[xdxr_data['category'] == 1]
                bfq_data['if_trade'] = True
                data = pd.concat(
                    [bfq_data, info[['category']][bfq_data.index[0]:]], axis=1)

                data['date'] = data.index
                data['if_trade'].fillna(value=False, inplace=True)
                data = data.fillna(method='ffill')
                data = pd.concat([
                    data, info[['fenhong', 'peigu', 'peigujia', 'songzhuangu'
                                ]][bfq_data.index[0]:]
                ],
                                 axis=1)
                data = data.fillna(0)

                data['preclose'] = (
                    data['close'].shift(1) * 10 - data['fenhong'] +
                    data['peigu'] * data['peigujia']) / (10 + data['peigu'] +
                                                         data['songzhuangu'])
                data['adj'] = (data['preclose'].shift(-1) /
                               data['close']).fillna(1)[::-1].cumprod()
                data['open'] = data['open'] * data['adj']
                data['high'] = data['high'] * data['adj']
                data['low'] = data['low'] * data['adj']
                data['close'] = data['close'] * data['adj']
                data['preclose'] = data['preclose'] * data['adj']

                data = data[data['if_trade']]
                return data.drop(
                    [
                        'fenhong', 'peigu', 'peigujia', 'songzhuangu',
                        'if_trade', 'category'
                    ],
                    axis=1)[data['open'] != 0].assign(date=data['date'].apply(
                        lambda x: str(x)[0:10]))[start_date:end_date]
            else:

                bfq_data['preclose'] = bfq_data['close'].shift(1)
                bfq_data['adj'] = 1
                return bfq_data[start_date:end_date]
        elif if_fq in ['03', 'ddqfq']:
            xdxr_data = QA_fetch_get_stock_xdxr(code)

            info = xdxr_data[xdxr_data['category'] == 1]

            bfq_data = data\
                .assign(date=pd.to_datetime(data['datetime'].apply(lambda x: x[0:10])))\
                .assign(code=str(code))\
                .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(str(x)[0:10])))\
                .set_index('date', drop=False, inplace=False)\
                .drop(['year', 'month', 'day', 'hour',
                       'minute', 'datetime'], axis=1)

            bfq_data['if_trade'] = True
            data = pd.concat(
                [bfq_data, info[['category']][bfq_data.index[0]:end_date]],
                axis=1)

            data['date'] = data.index
            data['if_trade'].fillna(value=False, inplace=True)
            data = data.fillna(method='ffill')
            data = pd.concat([
                data, info[['fenhong', 'peigu', 'peigujia', 'songzhuangu'
                            ]][bfq_data.index[0]:end_date]
            ],
                             axis=1)
            data = data.fillna(0)

            data['preclose'] = (data['close'].shift(1) * 10 - data['fenhong'] +
                                data['peigu'] * data['peigujia']) / (
                                    10 + data['peigu'] + data['songzhuangu'])
            data['adj'] = (data['preclose'].shift(-1) /
                           data['close']).fillna(1)[::-1].cumprod()
            data['open'] = data['open'] * data['adj']
            data['high'] = data['high'] * data['adj']
            data['low'] = data['low'] * data['adj']
            data['close'] = data['close'] * data['adj']
            data['preclose'] = data['preclose'] * data['adj']

            data = data[data['if_trade']]
            return data.drop(
                [
                    'fenhong', 'peigu', 'peigujia', 'songzhuangu', 'if_trade',
                    'category'
                ],
                axis=1)[data['open'] != 0].assign(date=data['date'].apply(
                    lambda x: str(x)[0:10]))[start_date:end_date]

        elif if_fq in ['02', 'hfq']:
            xdxr_data = QA_fetch_get_stock_xdxr(code)

            info = xdxr_data[xdxr_data['category'] == 1]

            bfq_data = data\
                .assign(date=pd.to_datetime(data['datetime'].apply(lambda x: x[0:10])))\
                .assign(code=str(code))\
                .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(str(x)[0:10])))\
                .set_index('date', drop=False, inplace=False)\
                .drop(['year', 'month', 'day', 'hour',
                       'minute', 'datetime'], axis=1)

            bfq_data['if_trade'] = True
            data = pd.concat(
                [bfq_data, info[['category']][bfq_data.index[0]:]], axis=1)

            data['date'] = data.index
            data['if_trade'].fillna(value=False, inplace=True)
            data = data.fillna(method='ffill')
            data = pd.concat([
                data, info[['fenhong', 'peigu', 'peigujia', 'songzhuangu'
                            ]][bfq_data.index[0]:]
            ],
                             axis=1)
            data = data.fillna(0)

            data['preclose'] = (data['close'].shift(1) * 10 - data['fenhong'] +
                                data['peigu'] * data['peigujia']) / (
                                    10 + data['peigu'] + data['songzhuangu'])
            data['adj'] = (data['preclose'].shift(-1) /
                           data['close']).fillna(1).cumprod()
            data['open'] = data['open'] / data['adj']
            data['high'] = data['high'] / data['adj']
            data['low'] = data['low'] / data['adj']
            data['close'] = data['close'] / data['adj']
            data['preclose'] = data['preclose'] / data['adj']
            data = data[data['if_trade']]
            return data.drop(
                [
                    'fenhong', 'peigu', 'peigujia', 'songzhuangu', 'if_trade',
                    'category'
                ],
                axis=1)[data['open'] != 0].assign(date=data['date'].apply(
                    lambda x: str(x)[0:10]))[start_date:end_date]

        elif if_fq in ['04', 'ddhfq']:
            xdxr_data = QA_fetch_get_stock_xdxr(code)

            info = xdxr_data[xdxr_data['category'] == 1]

            bfq_data = data\
                .assign(date=pd.to_datetime(data['datetime'].apply(lambda x: x[0:10])))\
                .assign(code=str(code))\
                .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(str(x)[0:10])))\
                .set_index('date', drop=False, inplace=False)\
                .drop(['year', 'month', 'day', 'hour',
                       'minute', 'datetime'], axis=1)

            bfq_data['if_trade'] = True
            data = pd.concat(
                [bfq_data, info[['category']][bfq_data.index[0]:end_date]],
                axis=1)

            data['date'] = data.index
            data['if_trade'].fillna(value=False, inplace=True)
            data = data.fillna(method='ffill')
            data = pd.concat([
                data, info[['fenhong', 'peigu', 'peigujia', 'songzhuangu'
                            ]][bfq_data.index[0]:end_date]
            ],
                             axis=1)
            data = data.fillna(0)

            data['preclose'] = (data['close'].shift(1) * 10 - data['fenhong'] +
                                data['peigu'] * data['peigujia']) / (
                                    10 + data['peigu'] + data['songzhuangu'])
            data['adj'] = (data['preclose'].shift(-1) /
                           data['close']).fillna(1).cumprod()
            data['open'] = data['open'] / data['adj']
            data['high'] = data['high'] / data['adj']
            data['low'] = data['low'] / data['adj']
            data['close'] = data['close'] / data['adj']
            data['preclose'] = data['preclose'] / data['adj']
            data = data[data['if_trade']]
            return data.drop(
                [
                    'fenhong', 'peigu', 'peigujia', 'songzhuangu', 'if_trade',
                    'category'
                ],
                axis=1)[data['open'] != 0].assign(date=data['date'].apply(
                    lambda x: str(x)[0:10]))[start_date:end_date]