Esempio n. 1
def slice_data(dataframe, slice_by, cols_used):


    if dataframe is not None:
        df_tmp = dataframe.copy()

        bys = df_tmp[slice_by].drop_duplicates().tolist()
        df_tmp.index = df_tmp[slice_by]

        result = {}
        idx4slice = su.idx4slice(dataframe, slice_by)
        for col in cols_used:
            tmp = su.slice(df_tmp, idx4slice, col)
            result[col] = {by: value for by, value in zip(bys, tmp)}

        result = _reverse_dict(result)
        result = None

    return result
Esempio n. 2
def is_daily(ids_to_update):
    sql_fnds = "SELECT fund_id, statistic_date FROM fund_nv_data_standard WHERE statistic_date >= '19700101' \
                AND fund_id IN {ids}".format(ids=sf.SQL.ids4sql(ids_to_update))
    su.tic("Fetching get_data......")
    fnds = pd.read_sql(sql_fnds, engine_rd)
    fnds = fnds.dropna().sort_values(by=["fund_id", "statistic_date"],
                                     ascending=[True, False])
    fnds.index = range(len(fnds))
    fnds["statistic_date"] = fnds["statistic_date"].apply(
        lambda x: time.mktime(x.timetuple()))

    ids4slice = su.idx4slice(fnds, slice_by="fund_id")
    ids = fnds.drop_duplicates(subset=["fund_id"])["fund_id"].tolist()
    t_reals = su.slice(fnds, ids4slice, "statistic_date")

    match_ratio = [
        (len(t_real) - 1) /
        ((t_real[0] - t_real[-1]) / 86400) if len(t_real) > 1 else None
        for t_real in t_reals
    matched = [
        "日度" if (x is not None and x >= 0.5) else None for x in match_ratio
    result = dict([x for x in list(zip(ids, matched)) if x[1] is not None])
    return result
Esempio n. 3
def is_monthly(ids_to_update):
    su.tic("Fetching get_data......")
    sql_std_m = "SELECT fund_id, statistic_date_std, statistic_date FROM fund_nv_standard_m \
                 WHERE fund_id IN {ids} \
                 AND fund_id IN (SELECT fund_id FROM (SELECT fund_id, COUNT(fund_id) cnt FROM fund_nv_standard_m \
                 GROUP BY fund_id HAVING cnt >= 3) T)".format(
    d_std_m = pd.read_sql(sql_std_m, engine_rd)

    ids4slice = su.idx4slice(d_std_m, slice_by="fund_id")
    ids = d_std_m.drop_duplicates(subset=["fund_id"])["fund_id"].tolist()
    t_reals = su.slice(d_std_m, ids4slice, "statistic_date")
    t_stds = su.slice(d_std_m, ids4slice, "statistic_date_std")
    match_ratio = [
        len([x for x in t_real if x is not None]) / len(t_std)
        for t_real, t_std in zip(t_reals, t_stds)
    matched = [
        "月度" if (x is not None and x >= 0.5) else None for x in match_ratio
    result = dict([x for x in list(zip(ids, matched)) if x[1] is not None])
    return result
Esempio n. 4
                sql_fnvdate = sf.SQL.firstnv_date(ids_diff)
                fnvdate = pd.read_sql(sql_fnvdate, engine_rd)
                ids_diff = fnvdate["fund_id"].tolist()
                t_min = pd.merge(fdate, fnvdate, "outer")
                print("Some foundation_date missed, use first nv date...", len(ids_diff), len(ids_fdate), len(t_min))
                t_min = t_min.sort_values("fund_id", ascending=True)  # Sort the df by fund id ASC

            sql_nav = sf.SQL.nav(ids_used)  # Get their navs
            d = pd.read_sql(sf.SQL.nav(ids_used), engine_rd)

            d["statistic_date"] = d["statistic_date"].apply(su.date2tstp)

            idx4slice = su.idx4slice(d, slice_by="fund_id")  # Grouping the datas By fund_id
            navs = su.slice(d, idx4slice, "nav")
            t_reals = su.slice(d, idx4slice, "statistic_date")
            t_mins = t_min["t_min"].tolist()
            t_mins_tstp = [su.date2tstp(x) for x in t_mins]

            print("length of Data: {0}".format(len(d)))
            t_stds = [tu.timeseries_std(, interval, periods_y=12, use_lastday=True, extend=1) for interval
                      in intervals]  # 标准序列   ###w->m

            t_std_y5 = t_stds[6]
            t_stds_len = [len(x) - 1 for x in t_stds]  # 标准序列净值样本个数
            t_std_alls = [tu.timeseries_std(, tu.periods_in_interval(, t_min, 12), periods_y=12,
                                            use_lastday=True, extend=6) for t_min in t_mins]  # 标准序列_成立以来
            t_std_alls = [t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1] for t_std_all, t_min in
Esempio n. 5
def calculate():
    df_whole = pd.DataFrame()
    conn = engine_read.connect()

    year = process_date.year
    month = process_date.month

    month_range = cld.monthrange(year, month)[1]
    time_to_fill = sf.Time(dt.datetime(year, month, month_range))
    # year, month = time_to_fill.year, time_to_fill.month
    # month_range = time_to_fill.month_range

    sql_bm = sf.SQL.market_index(  # Get benchmark prices
    sql_pe = sf.SQL.pe_index(, freq="m")  ###w->m

    bm = pd.read_sql(sql_bm, engine_read)
    bm["y1_treasury_rate"] = bm["y1_treasury_rate"].fillna(method="backfill")
    bm["y1_treasury_rate"] = bm["y1_treasury_rate"].apply(su.annually2monthly)
    bm["statistic_date"] = bm["statistic_date"].apply(su.date2tstp)
    pe = pd.read_sql(sql_pe, engine_read)
    pe["statistic_date"] = pe["statistic_date"].apply(su.date2tstp)

    prices_bm = [
        bm["hs300"].tolist(), bm["csi500"].tolist(), bm["sse50"].tolist(),
        bm["cbi"].tolist(), bm["nfi"]
    price_pe = pe["index_value"].tolist()
    r_tbond = bm["y1_treasury_rate"].tolist()
    t_bm = bm["statistic_date"].tolist()
    t_pe = pe["statistic_date"].tolist()

    intervals = table.intervals
    intervals5 = [1, 2, 3, 4, 5, 6, 10, 11]
    intervals6 = [2, 3, 4, 5, 6, 10, 11]

    result = []

    conn = engine_read.connect()

    # Get Data
    date_s = sf.Time(process_date -
                     dt.timedelta(  # Generate statistic_date

    sql_fids_updated = sf.SQL.ids_updated_sd(, "om")
    ids_updated = tuple(
        for x in conn.execute(sql_fids_updated).fetchall())  # 找到当月净值有更新的基金

    sql_o_updated = "SELECT DISTINCT fom.org_id FROM fund_org_mapping fom \
             JOIN org_info oi ON fom.org_id = oi.org_id \
             WHERE org_type_code = 1 AND oi.found_date <= '{0}'  AND fund_id IN {1}".format( - relativedelta(months=3),
        ids_updated)  # 根据净值更新的基金确定需要计算的投顾
    o_updated = tuple(x[0] for x in conn.execute(sql_o_updated).fetchall())

    sql_fom = "SELECT fom.org_id, fom.fund_id, oi.found_date, oi.org_name FROM fund_org_mapping fom \
               JOIN org_info oi ON fom.org_id = oi.org_id \
               JOIN fund_info fi ON fom.fund_id = fi.fund_id \
               WHERE fom.org_id IN {0} AND fom.org_type_code = 1 AND oi.found_date <= '{1}' AND fi.foundation_date <= '{2}'".format(
        o_updated, - relativedelta(months=3), - relativedelta(months=1))
    fom = pd.read_sql(sql_fom, conn)  # 根据需要计算的投顾找到其旗下管理的所有基金

    fid_used = tuple(fom["fund_id"])
    sql_fnd = sf.SQL.nav(fid_used)
    fnd = pd.read_sql(sql_fnd, conn)
    fnd = fnd.dropna()
    fnd.index = range(len(fnd))

    data = fom.merge(fnd, how="inner", on="fund_id")
    data = data.sort_values(by=["org_id", "fund_id", "statistic_date"],
                            ascending=[True, True, False])
    t_mins = data.groupby(["org_id"])["statistic_date"].min().tolist()
    t_mins_tstp = [time.mktime(x.timetuple()) for x in t_mins]
    data["statistic_date"] = data["statistic_date"].apply(
        lambda x: time.mktime(x.timetuple()))
    data.index = range(len(data))

    ids_o = data["org_id"].drop_duplicates().tolist()
    names_o = data.drop_duplicates(subset=["org_id"])["org_name"].tolist()
    idx4slice_o = su.idx4slice(data, "org_id")
    dfs = [
        data[idx4slice_o[i]:idx4slice_o[i + 1]]
        if i != len(idx4slice_o) - 1 else data[idx4slice_o[i]:]
        for i in range(len(idx4slice_o) - 1)

    # Proprocess
    # 标准序列
    t_stds = [
                          extend=1) for interval in intervals
    t_std_y5 = t_stds[6]
    t_stds_len = [len(x) - 1 for x in t_stds]

    # 基金标准序列_成立以来
    t_std_alls = [
                          tu.periods_in_interval(, t_min, 12),
                          extend=6) for t_min in t_mins
    ]  # 标准序列_成立以来
    t_std_alls = [
        t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1]
        for t_std_all, t_min in zip(t_std_alls, t_mins_tstp)

    # 基准指数的标准序列_成立以来
    matchs_bm = [
        tu.outer_match4indicator_m(t_bm, t_std_all, False)
        for t_std_all in t_std_alls
    idx_matchs_bm = [x[1] for x in matchs_bm]
    price_bm0_all = [[
        prices_bm[0][ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_bm]
    price_bm1_all = [[
        prices_bm[1][ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_bm]
    price_bm2_all = [[
        prices_bm[2][ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_bm]
    price_bm3_all = [[
        prices_bm[3][ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_bm]
    price_bm4_all = [[
        prices_bm[4][ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_bm]

    matchs_pe = [
        tu.outer_match4indicator_m(t_pe, t_std_all, False)
        for t_std_all in t_std_alls
    idx_matchs_pe = [x[1] for x in matchs_pe]
    price_pe_all = [[
        price_pe[ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_pe]

    # 基准指标的收益率_成立以来
    r_bm0_all = [fi.gen_return_series(x) for x in price_bm0_all]
    r_bm1_all = [fi.gen_return_series(x) for x in price_bm1_all]
    r_bm2_all = [fi.gen_return_series(x) for x in price_bm2_all]
    r_bm3_all = [fi.gen_return_series(x) for x in price_bm3_all]
    r_bm4_all = [fi.gen_return_series(x) for x in price_bm4_all]

    r_pe_all = [fi.gen_return_series(x) for x in price_pe_all]

    tmp = [len(idx_matchs_bm[i]) for i in range(len(idx_matchs_bm))]
    tmp_id = tmp.index(max(tmp))
    tmp_list = [
        r_tbond[ix] if ix is not None else None
        for ix in idx_matchs_bm[tmp_id].values()
    tmp = pd.DataFrame(tmp_list)[0].fillna(method="backfill").tolist()

    r_f_all = [[
        r_tbond[idx[k]] if idx[k] is not None else tmp[k] for k in idx.keys()
    ] for idx in idx_matchs_bm]
    r_f_all = [x[1:] for x in r_f_all]

    # 基准指标的收益率_不同频率
    matchs_bm = tu.outer_match4indicator_m(t_bm, t_std_y5,
                                           False)  # 基准指数标准序列_成立以来
    matchs_pe = tu.outer_match4indicator_m(t_pe, t_std_y5, False)
    idx_matchs_bm = matchs_bm[1]
    idx_matchs_pe = matchs_pe[1]
    price_bm0_y5 = [
        prices_bm[0][ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    price_bm1_y5 = [
        prices_bm[1][ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    price_bm2_y5 = [
        prices_bm[2][ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    price_bm3_y5 = [
        prices_bm[3][ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    price_bm4_y5 = [
        prices_bm[4][ix] if ix is not None else None
        for ix in idx_matchs_bm.values()

    price_pe_y5 = [
        price_pe[ix] if ix is not None else None
        for ix in idx_matchs_pe.values()

    # 基准指标的收益率_不同频率
    r_bm0_y5 = fi.gen_return_series(price_bm0_y5)
    r_bm1_y5 = fi.gen_return_series(price_bm1_y5)
    r_bm2_y5 = fi.gen_return_series(price_bm2_y5)
    r_bm3_y5 = fi.gen_return_series(price_bm3_y5)
    r_bm4_y5 = fi.gen_return_series(price_bm4_y5)
    r_pe_y5 = fi.gen_return_series(price_pe_y5)

    r_f_y5 = [
        r_tbond[ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    r_f_y5 = r_f_y5[1:]

    rs_bm0 = [r_bm0_y5[:length - 1] for length in t_stds_len]
    rs_bm1 = [r_bm1_y5[:length - 1] for length in t_stds_len]
    rs_bm2 = [r_bm2_y5[:length - 1] for length in t_stds_len]
    rs_bm3 = [r_bm3_y5[:length - 1] for length in t_stds_len]
    rs_bm4 = [r_bm4_y5[:length - 1] for length in t_stds_len]

    rs_pe = [r_pe_y5[:length - 1] for length in t_stds_len]
    rs_f = [r_f_y5[:length - 1] for length in t_stds_len]

    benchmark = {
        1: rs_bm0,
        2: rs_bm1,
        3: rs_bm2,
        4: rs_pe,
        6: rs_bm3,
        7: rs_bm4
    benchmark_all = {
        1: r_bm0_all,
        2: r_bm1_all,
        3: r_bm2_all,
        4: r_pe_all,
        6: r_bm3_all,
        7: r_bm4_all

    for i in range(len(ids_o)):
        df = dfs[i]
        df.index = range(len(df))
        idx4slice = su.idx4slice(df, "fund_id")
        navs = su.slice(df, idx4slice, "nav")
        t_reals = su.slice(df, idx4slice, "statistic_date")

        matchs_all = [
            tu.outer_match4indicator_m(t_real, t_std_alls[i], drop_none=False)
            for t_real in t_reals
        idx_matchs_all = [x[1] for x in matchs_all]
        nav_matchs_all = [[
            nav[ix] if ix is not None else np.NaN for ix in idx.values()
        ] for nav, idx in zip(navs, idx_matchs_all)]

        nv_matrix = np.array(nav_matchs_all).T
        r_total = np.nanmean((nv_matrix[:-1] / nv_matrix[1:] - 1), axis=1)
        price_total = np.nancumprod(1 + r_total[::-1])[::-1].tolist()
        price_total.append(1)  # 定义基期伪价格为1
        r_total = fi.gen_return_series(price_total)

        prices = []
        for j in range(7):
            if t_mins[i] + relativedelta(months=intervals[j]) <=
                length = min(len(price_total), t_stds_len[j])

        for j in range(7, 11):
            length = min(len(price_total), t_stds_len[j])

        navs2 = [prices[i] for i in intervals5]
        navs3 = [prices[i] for i in intervals6]
        rs2 = [fi.gen_return_series(x) for x in navs2]
        rs3 = [fi.gen_return_series(x) for x in navs3]

        rs_f_ = rs_f.copy()
        rs_f2_ = [rs_f_[i] for i in intervals5]
        rs_f3_ = [rs_f_[i] for i in intervals6]

        for k in benchmark.keys():
            rs_bm_ = benchmark[k].copy()  # 指定benchmark
            rs_bm2 = [rs_bm_[i] for i in intervals5]
            rs_bm3 = [rs_bm_[i] for i in intervals6]

            s_time = [
                fi.competency_timing(r, r_bm, r_f)
                for r, r_bm, r_f in zip(rs3, rs_bm3, rs_f3_)
            s_security = [
                fi.competency_stock(r, r_bm, r_f)
                for r, r_bm, r_f in zip(rs3, rs_bm3, rs_f3_)
            persistence = [
                fi.persistence_er(r, r_bm) for r, r_bm in zip(rs2, rs_bm2)
            odds = [fi.odds(r, r_bm) for r, r_bm in zip(rs2, rs_bm2)]

            tmp = [odds, persistence, s_time, s_security]
            result_i = [
                ids_o[i], names_o[i], k, 1, 1, nv_matrix.shape[1], 60001,
                "全产品", 6000101, "全产品",
            for x in tmp:

    df = pd.DataFrame(result)
    df[list(range(11, 41))] = df[list(range(11, 41))].astype(np.float64)
    df[list(range(11, 41))] = df[list(range(11,
                                            41))].apply(lambda x: round(x, 6))
    df.columns = columns
    df_whole = df_whole.append(df)

    return df_whole
Esempio n. 6
tmins = dict(
for fid, fnvdate in zip(d_fund_firstnvdate["fund_id"],
    tmins[fid] = fnvdate

d_fund_nv = pd.read_sql(sql_fund_nv, conn)
d_fund_nv = d_fund_nv.dropna()
d_fund_nv = d_fund_nv.drop(
    d_fund_nv.loc[d_fund_nv["statistic_date"] <, 1, 2)].index)
d_fund_nv["statistic_date"] = d_fund_nv["statistic_date"].apply(su.date2tstp)
d_fund_nv.index = range(len(d_fund_nv))

idxs = su.idx4slice(d_fund_nv, "fund_id")
navs = su.slice(d_fund_nv, idxs, "nav")
ts = su.slice(d_fund_nv, idxs, "statistic_date")
ids = d_fund_nv["fund_id"].drop_duplicates().tolist()

t_std_totals = []
for id_ in ids:
        t_std_total = tu.timeseries_std(date_s,
                                            date_s, tmins[id_], 12),
    except Exception as e:
Esempio n. 7
def cal_std(fund_ids=None, whole=False):
        fund_ids: str


    if whole is True:
        sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard"
        sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard \
                    WHERE update_time >= {ut}".format(ut=yesterday)

    if fund_ids:
        if 'WHERE' in sql_navs:
            sql_navs += " AND fund_id in ({})".format(fund_ids)
            sql_navs += " WHERE fund_id in ({})".format(fund_ids)

    su.tic("Fetching nv Data......")
    df_nav = pd.read_sql(sql_navs, conn)

    criterias = [(df_nav["nav"] >= 0.2), (df_nav["added_nav"] >= 0.2),
                 (df_nav["statistic_date"] >=, 1, 2)),
                 (df_nav["statistic_date"] <=]

    criteria = get_creterias(criterias)
    df_nav = df_nav.loc[criteria].sort_values(["fund_id", "statistic_date"],
                                              ascending=[True, False])
    df_nav.index = range(len(df_nav))
    ids = df_nav["fund_id"].drop_duplicates().tolist()

    t_mins = list(df_nav.groupby("fund_id")["statistic_date"].min())
    t_mins_tstp = [time.mktime(x.timetuple()) for x in t_mins]
    t_maxs = list(df_nav.groupby("fund_id")["statistic_date"].max())
    t_maxs_tstp = [time.mktime(x.timetuple()) for x in t_maxs]

    idx4slice = su.idx4slice(df_nav, slice_by="fund_id")
    navs = su.slice(df_nav, idx4slice, "nav")
    added_navs = su.slice(df_nav, idx4slice, "added_nav")
    swanavs = su.slice(df_nav, idx4slice, "swanav")
    t_reals = su.slice(df_nav, idx4slice, "statistic_date")
    t_reals_tstp = []
    for t_real in t_reals:
        t_reals_tstp.append([time.mktime(x.timetuple()) for x in t_real])

    t_std_alls_w = [
                          tu.periods_in_interval(friday, t_min, 12),
                          extend=4) for t_min in t_mins
    ]  # 标准序列_成立以来
    t_std_alls_w = [
        t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1]
        for t_std_all, t_min in zip(t_std_alls_w, t_mins_tstp)
    t_std_alls_w = [
        t_std_all[-len([x for x in t_std_all if x < t_max]) - 1:]
        for t_std_all, t_max in zip(t_std_alls_w, t_maxs_tstp)

    t_std_alls_m = [
                          tu.periods_in_interval(date, t_min, 12),
                          extend=6) for date, t_min in zip(t_maxs, t_mins)
    ]  # 标准序列_成立以来
    t_std_alls_m = [
        t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1]
        for t_std_all, t_min in zip(t_std_alls_m, t_mins_tstp)

    matchs_w = [
        tu.outer_match4indicator_w(t_real, t_std)
        for t_real, t_std in zip(t_reals_tstp, t_std_alls_w)
    idx_matchs_w = [x[1] for x in matchs_w]
    nav_matchs_w = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(navs, idx_matchs_w)]
    anav_matchs_w = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(added_navs, idx_matchs_w)]
    swanav_matchs_w = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(swanavs, idx_matchs_w)]
    t_matchs_w = [[
        t_real[ix] if ix is not None else None for ix in idx.values()
    ] for t_real, idx in zip(t_reals, idx_matchs_w)]
    t_matchs_std_w = [[:-1], "date") if x is not None else None for x in t_std_alls_w

    matchs_m = [
        tu.outer_match4indicator_m(t_real, t_std)
        for t_real, t_std in zip(t_reals_tstp, t_std_alls_m)
    idx_matchs_m = [x[1] for x in matchs_m]
    nav_matchs_m = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(navs, idx_matchs_m)]
    anav_matchs_m = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(added_navs, idx_matchs_m)]
    swanav_matchs_m = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(swanavs, idx_matchs_m)]
    t_matchs_m = [[
        t_real[ix] if ix is not None else None for ix in idx.values()
    ] for t_real, idx in zip(t_reals, idx_matchs_m)]
    t_matchs_std_m = [[:-1], "date") if x is not None else None for x in t_std_alls_m

    result_w = {
        "fund_id": ids,
        "nav": nav_matchs_w,
        "added_nav": anav_matchs_w,
        "swanav": swanav_matchs_w,
        "statistic_date": t_matchs_w,
        "statistic_date_std": t_matchs_std_w

    result_m = {
        "fund_id": ids,
        "nav": nav_matchs_m,
        "added_nav": anav_matchs_m,
        "swanav": swanav_matchs_m,
        "statistic_date": t_matchs_m,
        "statistic_date_std": t_matchs_std_m

    su.tic("Merging Result......")
    result = {}
    result["w"] = pd.DataFrame.from_dict(merge_result(result_w, ids))
    result["m"] = pd.DataFrame.from_dict(merge_result(result_m, ids))

    return result