Esempio n. 1
0
def fetch_peindex(**kwargs):
    """

    Args:
        **kwargs:
            statistic_date:
            iids:
            freq:
            use_tmstmp:
            conn:

    Returns:
        pandas.DataFrame
    """
    kw_used = ("statistic_date", "iids", "freq", "conn")
    statistic_date, iids, freq, conn = meta.get_kwargs_used(kw_used, **kwargs)

    sql_pes = SQL_USED.pe_index(statistic_date, iids, freq)
    df_pes = pd.read_sql(sql_pes, conn)

    if kwargs.get("use_tmstmp") is True:
        df_pes["statistic_date"] = df_pes["statistic_date"].apply(
            lambda x: dt.datetime(x.year, x.month, x.day).timestamp())

    df_pes.sort_values(by=["index_id", "statistic_date"],
                       ascending=[True, False],
                       inplace=True)
    df_pes.index = range(len(df_pes))

    return df_pes
Esempio n. 2
0
def fetch_found_date(**kwargs):
    """

    Args:
        **kwargs:
            iids:
            id_type:
            use_tmstmp:
            conn:

    Returns:
        dict
    """
    kw_used = ("iids", "id_type", "conn")
    iids, id_type, conn = meta.get_kwargs_used(kw_used, **kwargs)
    sql_foundation_date = SQL_USED.foundation_date(ids=iids, id_type=id_type)
    d_foundation_date = pd.read_sql(sql_foundation_date, conn)

    if id_type == "fund":
        k1 = "fund_id"
    elif id_type == "org":
        k1 = "org_id"

    if kwargs.get("use_tmstmp") is True:
        d_foundation_date["t_min"] = d_foundation_date["t_min"].apply(
            lambda x: dt.datetime(x.year, x.month, x.day).timestamp())

    result = dict(zip(d_foundation_date[k1], d_foundation_date["t_min"]))

    return result
Esempio n. 3
0
def fetch_fundname(**kwargs):
    kw_used = ("iids", "level", "conn")
    iids, level, conn = meta.get_kwargs_used(kw_used, **kwargs)
    sql_fname = SQL_USED.fund_name(iids, level=level)
    df = pd.read_sql(sql_fname, con=conn)
    results = dict(zip(df["fund_id"], df["fund_name"]))
    return results
Esempio n. 4
0
def fetch_marketindex(**kwargs):
    """

    Args:
        **kwargs:
            statistic_date:
            iids:
            use_tmstmp:
            conn:

    Returns:
        pandas.DataFrame
    """
    kw_used = ("statistic_date", "iids", "transform", "conn")
    statistic_date, iids, apply, conn = meta.get_kwargs_used(kw_used, **kwargs)

    if type(iids) is str:
        iids = [iids]

    sql_bms = SQL_USED.market_index(statistic_date, iids, whole=True)
    df_bms = pd.read_sql(sql_bms, conn)
    if "y1_treasury_rate" in iids:
        df_bms.loc[:, "y1_treasury_rate"] = df_bms["y1_treasury_rate"].fillna(
            method="backfill")

    if apply is not None:
        for col, func in apply.items():
            df_bms[col] = df_bms[col].apply(func)

    result = pd.DataFrame()
    for col in df_bms.columns[:-1]:
        tmp = df_bms.loc[:, [col, "statistic_date"]]
        tmp.loc[:, "index_id"] = col
        tmp.columns = ["index_value", "statistic_date", "index_id"]
        tmp = tmp.dropna()
        result = result.append(tmp)
    result = result[["index_id", "index_value", "statistic_date"]]

    if kwargs.get("use_tmstmp") is True:
        # result.loc[:, "statistic_date"] = result["statistic_date"].apply(lambda x: pd.Timestamp(x).timestamp())
        result.loc[:, "statistic_date"] = result["statistic_date"].apply(
            lambda x: dt.datetime(x.year, x.month, x.day).timestamp())

    result.sort_values(by=["index_id", "statistic_date"],
                       ascending=[True, False],
                       inplace=True)
    result.index = range(len(result))

    return result
Esempio n. 5
0
def fetch_firstdate_used(**kwargs):
    """

    Args:
        **kwargs:
            iids:
            id_type:
            use_tmstmp:
            conn:

    Returns:
        dict
    """
    kw_used = ("iids", "id_type", "use_tmstmp", "check_date", "conn")
    iids, id_type, use_tmstmp, check_date, conn = meta.get_kwargs_used(
        kw_used, **kwargs)
    check_date = tu.universal_time(check_date)[1]
    error_list = []
    first_date = fetch_found_date(iids=iids,
                                  id_type=id_type,
                                  use_tmstmp=use_tmstmp,
                                  conn=conn)

    for iid, date in first_date.items():
        if date > check_date:
            error_list.append(iid)

    if len(error_list) > 0:
        print(
            "{num_error} of {num_total} funds ({err_funds}) foundation date may be wrong, using first nv date instead..."
            .format(num_error=len(error_list),
                    num_total=len(iids),
                    err_funds=error_list))

    # if len(first_date) != len(iids):
    if (len(first_date) - len(error_list)
        ) != len(iids):  # 有错误的首个日期应该为: 1)首日期为空 2) 首个净值日期错误两种情况的并集;
        if id_type == "fund":
            print(
                "{num_missing} of {num_total} funds' foundation date missing, using first nv date instead..."
                .format(num_missing=len(set(iids) - first_date.keys()),
                        num_total=len(iids)))
            firstnv_date = fetch_firstnv_date(
                iids=(set(iids) - first_date.keys() | set(error_list)),
                use_tmstmp=use_tmstmp,
                conn=conn)
            first_date.update(firstnv_date)

    return first_date
Esempio n. 6
0
def fetch_fids_used(**kwargs):
    """

    Args:
        **kwargs:
            statistic_date:
            freq:
    Returns:
        list
    """
    kw_used = ("statistic_date", "freq", "conn")
    statistic_date, freq, conn = meta.get_kwargs_used(kw_used, **kwargs)
    sql_ids_used = SQL_USED.ids_updated_sd(statistic_date, freq)
    ids_used = [x[0] for x in conn.execute(sql_ids_used)]
    return ids_used
Esempio n. 7
0
def gen_stdseries(**kwargs):
    """
        Generate standard time series of various intervals for aligning.

    Args:
        **kwargs:

    Returns:

    """
    kw_used = ("date_s", "intervals", "freq", "extend")
    date_s, intervals, freq, extend = meta.get_kwargs_used(kw_used, **kwargs)

    if extend is None:
        tm_series_std = {
            interval: tu.timeseries_std(
                date_s,
                interval,
                _periods_of_freq[freq],
                # extend=1 * int(freq == "d" and interval != "m"),
                extend=1 + 1 * int(not (freq == "d" and interval == "m")),
                use_lastday=True,  # 处理日频, 本月以来区间多取一个点的问题
                keep_firstday=True,
                weekday=kwargs.get("weekday", False))
            for interval in intervals if interval != "whole"
        }
    else:
        tm_series_std = {
            interval: tu.timeseries_std(
                date_s,
                interval,
                _periods_of_freq[freq],
                extend=1 + extend.get(interval, 0) +
                1 * int(freq == "d" and interval !=
                        "m"),  # 区间"今年以来", 以及大于等于一年的基金, 需要加长一个月的数据, 用于算法的区间外搜索;
                use_lastday=True,
                keep_firstday=True,
                weekday=kwargs.get("weekday", False))
            for interval in intervals if interval != "whole"
        }
    return tm_series_std
Esempio n. 8
0
def gen_stdseries_longest(**kwargs):
    """

    Args:
        **kwargs:
            iids: collections.Iterable or dict{id: tm_series}
            date_s: datetime.date, or dict{id: <datetime.date>}
            date_e: datetime.date, or dict{id: <datetime.date>}
            freq: str

    Returns:

    """
    kw_used = ("iids", "key_tm", "date_s", "date_e", "freq")
    iids, key_tm, date_s, date_e, freq = meta.get_kwargs_used(
        kw_used, **kwargs)

    if isinstance(date_s, (dt.date, dt.datetime, float)):
        date_s = dict.fromkeys(iids, date_s)
    elif date_s is None:
        date_s = {
            iid: max(attributes[key_tm])
            for iid, attributes in iids.items()
        }
    if isinstance(date_e, (dt.date, dt.datetime, float)):
        date_e = dict.fromkeys(iids, date_e)
    elif date_e is None:
        date_e = {
            iid: min(attributes[key_tm])
            for iid, attributes in iids.items()
        }

    t_std_alls = {}
    for iid in iids:
        t_std_alls[iid] = tu.timeseries_std_se(
            date_s.get(iid),
            date_e.get(iid),
            periods_y=_periods_of_freq.get(freq),
            weekday=kwargs.get("weekday", False))

    return t_std_alls
Esempio n. 9
0
def fetch_fundnv(**kwargs):
    """

    Args:
        **kwargs:
            iids:
            use_tmstmp:
            conn:

    Returns:
        pandas.DataFrame
    """
    kw_used = ("iids", "conn")
    iids, conn = meta.get_kwargs_used(kw_used, **kwargs)

    if "processes" in kwargs:
        _threadpool = ThreadPool(processes=_poolsize.get("thread"))
        tasks = [
            SQL_USED.nav(x) for x in _get_chunk(iids, kwargs.get("processes"))
        ]
        my_func = functools.partial(pd.read_sql, con=conn)
        results = _threadpool.map(my_func, tasks)
        _threadpool.close()
        result = pd.DataFrame()
        for df in results:
            result = result.append(df)
    else:
        sql_fund_nv = SQL_USED.nav(iids)
        result = pd.read_sql(sql_fund_nv, con=conn)

    if kwargs.get("use_tmstmp") is True:
        # result["statistic_date"] = result["statistic_date"].apply(lambda x: pd.Timestamp(x).timestamp())
        result["statistic_date"] = result["statistic_date"].apply(
            lambda x: dt.datetime(x.year, x.month, x.day).timestamp())

    result.sort_values(by=["fund_id", "statistic_date"],
                       ascending=[True, False],
                       inplace=True)
    result.index = range(len(result))

    return result
Esempio n. 10
0
def fetch_fundtype(**kwargs):
    """

    Args:
        **kwargs:
            iids:
            dimension: {"strategy", "structure", "target", "issuance"}
            level: 1
            conn:
    Returns:

    """
    kw_used = ("iids", "dimension", "level", "conn")
    iids, dimension, level, conn = meta.get_kwargs_used(kw_used, **kwargs)
    sql_ftype = SQL_USED.fund_type(iids, dimension, level)
    if sql_ftype is not None:
        df = pd.read_sql(sql_ftype, con=conn)
        results = dict(zip(df["fund_id"], df["code"]))
        return results
    else:
        return {}
Esempio n. 11
0
def fetch_firstnv_date(**kwargs):
    """

    Args:
        **kwargs:
            iids:
            use_tmstmp:
            conn:

    Returns:
        dict
    """
    kw_used = ("iids", "conn")
    iids, conn = meta.get_kwargs_used(kw_used, **kwargs)

    sql_firstnv_date = SQL_USED.firstnv_date(iids)
    d_firstnv_date = pd.read_sql(sql_firstnv_date, con=conn)

    if kwargs.get("use_tmstmp") is True:
        d_firstnv_date["t_min"] = d_firstnv_date["t_min"].apply(
            lambda x: dt.datetime(x.year, x.month, x.day).timestamp())

    return dict(zip(d_firstnv_date["fund_id"], d_firstnv_date["t_min"]))
Esempio n. 12
0
def match_by_std(obj, **kwargs):
    """
        Match objects with time series to standard time series, and apply the strategy to its other attributes.

    Args:
        obj: dict
            Dict like {id: {key1: Iterable, key2: Iterable, ...}};

        **kwargs:
            key_tm: str
                Key of the time series;
            key_used: Iterable<str>
                keys to match to standard time series;
            date_s: datetime.date, datetime.datetime, or float
                Statistic date(or the start date) of the standard time series;
            date_e: datetime.date, datetime.datetime, float, or dict
                Earliest date(or the end date) of the standard time series. If a dict is passed, then it should be
                {id: date}_like and its ids should be the same as the `obj` length;
            intervals: Iterable
                Interval of the standard time series to match, optional {1, 3, 6, 12, 24, 36, 60, "w", "m", "q", "a",
                "whole"};
            freq: str
                Frequency of the standard time series, optional {"w", "m"};
            extend: int, or dict
                Extra sample number of `interval` to use. If an int is parsed, then all intervals in `interval` will use
                this int as the extra sample number, else if an dict like {interval: extra_num} is parsed, then the
                specified interval will use the given extra number. Default None;
            shift: dict
                Dict like {id: shift_num} to specified ids which need to be shifted on its match case;
            apply: dict
                Dict like {id: func}
    Returns:
        dict like {id: {key_used:{interval: Iterable}}}
    """
    kw_used = ("key_tm", "key_used", "date_s", "date_e", "intervals", "freq",
               "shift", "extend", "apply")
    key_tm, key_used, date_s, date_e, intervals, freq, shift, extend, apply = meta.get_kwargs_used(
        kw_used, **kwargs)

    if isinstance(date_e, (dt.date, dt.datetime, float)):
        date_e = dict.fromkeys(obj.keys(), tu.universal_time(date_e)[1])
    elif date_e is None:
        date_e = {
            iid: tu.universal_time(min(attributes[key_tm]))[1]
            for iid, attributes in obj.items()
        }

    tm_series_std_alls = gen_stdseries_longest(iids=obj,
                                               key_tm=key_tm,
                                               date_s=date_s,
                                               date_e=date_e,
                                               freq=freq,
                                               weekday=kwargs.get(
                                                   "weekday", False))
    tm_series_std = gen_stdseries(date_s=date_s,
                                  freq=freq,
                                  intervals=intervals,
                                  extend=extend,
                                  weekday=kwargs.get("weekday", False))

    sample_nums = {
        interval: len(tm_serie_std)
        for interval, tm_serie_std in tm_series_std.items()
    }

    if freq == "w" or freq == "d":
        matchs_whole = {
            iid: tu.outer_match4indicator_w(attributes[key_tm],
                                            tm_series_std_alls[iid], False)[1]
            for iid, attributes in obj.items()
        }
        matchs_w = {
            iid: tu.outer_match4indicator_w(attributes[key_tm],
                                            tm_series_std["w"])[1]
            for iid, attributes in obj.items()
        }

    elif freq == "m":
        matchs_whole = {
            iid: tu.outer_match4indicator_m(attributes[key_tm],
                                            tm_series_std_alls[iid], False)[1]
            for iid, attributes in obj.items()
        }

    result = dict.fromkeys(obj.keys(), {})
    if shift is None:
        shift = {}

    # match for each object
    date_s_dt = dt.date.fromtimestamp(tu.universal_time(date_s)[1])
    # intervals_regular = [interval for interval in intervals if interval not in ("w", "a", "whole")]   #
    intervals_regular = [
        interval for interval in intervals if interval not in ("w", "whole")
    ]
    for iid, attributes in obj.items():
        shift_iid = shift.get(iid, 0)
        result[iid] = dict.fromkeys(key_used, {})
        date_e_iid = dt.date.fromtimestamp(date_e[iid])
        for key in result[iid].keys():
            freq_of_key = {}

            freq_of_key["whole"] = [
                attributes[key][idx] if idx is not None else None
                for idx in matchs_whole[iid].values()
            ]

            if apply is not None and iid in apply:
                freq_of_key["whole"] = apply[iid](freq_of_key["whole"])

            if shift_iid > 0:
                freq_of_key["whole"] = freq_of_key["whole"][shift_iid:]

            # 根据每个基金产品的成立时间判断可以计算多长区间
            interval_used = _check_intervals(date_s_dt, date_e_iid, intervals)

            length_max = len(freq_of_key["whole"])
            for interval in intervals_regular:
                if interval_used[interval]:
                    sp_num = sample_nums[interval] - shift_iid - 1  #
                    # sp_num = sample_nums[interval] - 1
                    freq_of_key[interval] = freq_of_key["whole"][:sp_num]
                    if sp_num > length_max:
                        freq_of_key[interval].extend([None] *
                                                     (sp_num - length_max))
                else:
                    freq_of_key[interval] = None

            # freq_of_key["a"] = [attributes[key][idx] if idx is not None else None for idx in matchs_a[iid].values()]  #

            if freq == "w" or freq == "d":
                freq_of_key["w"] = [
                    attributes[key][idx] if idx is not None else None
                    for idx in matchs_w[iid].values()
                ]

            result[iid][key] = freq_of_key

    return result