コード例 #1
0
def calculate(idx):
    dfs = pd.DataFrame()

    PEIndex = sf.PEIndex(idx)
    first_year = PEIndex.firstyear

    result_r = {}
    components_num = {}

    for year in range(first_year, process_date.year + 1):
        if year == process_date.timetuple().tm_year:
            month = process_date.month
        else:
            month = 12

        sql_i = sf.SQL_PEIndex(PEIndex.idx, year).yeardata_m

        conn = engine_rd.connect()

        date_s = dt.date(year, month, 1) - dt.timedelta(1)

        su.tic("Getting Data")
        d = pd.read_sql(sql_i, conn)
        conn.close()

        su.tic("Preprocessing...")
        d["statistic_date"] = d["statistic_date"].apply(
            lambda x: time.mktime(x.timetuple()))
        d_dd = d.drop_duplicates("fund_id")
        idx_slice = d_dd.index.tolist()
        idx_slice.append(len(d))

        t_std = tu.timeseries_std(dt.datetime(year, month, 10),
                                  month,
                                  12,
                                  1,
                                  use_lastday=True)
        t_std1 = t_std[:-1]

        su.tic("Grouping...")
        ds = [
            d[idx_slice[i]:idx_slice[i + 1]]
            for i in range(len(idx_slice) - 1)
        ]
        ts = [x["statistic_date"].tolist() for x in ds]
        navs = [x["nav"].tolist() for x in ds]

        su.tic("Matching...")
        matchs1 = [tu.outer_match4index_f7(x, t_std1, False) for x in ts]
        matchs2 = [tu.outer_match4index_b7(x, t_std1) for x in ts]
        matchs3 = [tu.outer_match4index_m(x, t_std, False) for x in ts]
        matchs = [
            su.merge_result(x1, x2, x3)
            for x1, x2, x3 in zip(matchs1, matchs2, matchs3)
        ]

        su.tic("Getting Result...")
        t_matchs = [x[0] for x in matchs]
        t_matchs = [tu.tr(x) for x in t_matchs]
        idx_matchs = [x[1] for x in matchs]
        nav_matchs = [[
            navs[i][idx] if idx is not None else None
            for idx in idx_matchs[i].values()
        ] for i in range(len(idx_matchs))]

        su.tic("Calculating Index...")
        nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix()
        rs = nvs[:-1] / nvs[1:] - 1
        rs[rs > 30] = np.nan
        rs[rs < -1] = np.nan
        r = np.nanmean(rs, axis=1)
        r[np.isnan(r)] = 0

        result_r[year] = r
        components_num[year] = np.sum(~np.isnan(rs), axis=1)
        su.tic("Year:{0}, Done...".format(year))

    values_r = []
    values_num = []
    for year in range(first_year, process_date.timetuple().tm_year + 1):
        if len(values_r) == 0:
            values_r = result_r[year].tolist()[::-1]
            values_num = components_num[year].tolist()[::-1]
        else:
            values_r.extend(result_r[year].tolist()[::-1])
            values_num.extend(components_num[year].tolist()[::-1])

    adjust_periods = 1
    date_tmp = date_s - relativedelta.relativedelta(months=adjust_periods + 1)
    date_tmp = dt.date(date_tmp.year, date_tmp.month,
                       cld.monthrange(date_tmp.year, date_tmp.month)[1])
    sql_base = "SELECT index_value FROM fund_month_index_static WHERE index_id = '{idx_id}' \
    AND statistic_date = '{sd}'".format(idx_id=PEIndex.id, sd=date_tmp)
    base = pd.read_sql(sql_base, engine_rd).get_value(0, "index_value")

    result = (np.array(values_r)[-(adjust_periods + 1) - 1:] +
              1).cumprod() * base
    result = result.tolist()
    values_num = values_num[-(adjust_periods + 1) - 1:]

    tag = tu.timeseries_std(
        dt.datetime(year, month + 1, 10),
        tu.periods_in_interval(dt.datetime(year, month + 1, 10),
                               dt.datetime(first_year, 1, 10), 12), 12)[::-1]
    tag = [dt.date.fromtimestamp(x - 864000) for x in tag]
    tag = tag[-(adjust_periods + 1) - 1:]

    op = pd.DataFrame(list(zip(tag, result, values_num)))
    op.columns = ["statistic_date", "index_value", "funds_num"]

    cols = [
        "index_id", "index_name", "typestandard_code", "typestandard_name",
        "type_code", "type_name", "stype_code", "stype_name", "index_method",
        "data_source", "data_source_name"
    ]
    values = [
        PEIndex.id, PEIndex.name, PEIndex.typestandard["code"],
        PEIndex.typestandard["name"], PEIndex.type["code"],
        PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1,
        0, "私募云通"
    ]
    col_dict = dict(zip(cols, values))
    for col, val in col_dict.items():
        op[col] = val

    dfs = dfs.append(op[:-1])
    return dfs
コード例 #2
0
ファイル: fund_index_03_w.py プロジェクト: dxcv/fund
    idx_slice.append(len(d))
    ids = d_dd["fund_id"].tolist()

    last_monday = date_s - dt.timedelta(
        cld.weekday(date_s.year, date_s.month, date_s.day))  #
    # t_std = T.timeseries_std(last_monday, T.periods_in_interval(last_monday, dt.date(year-1,12,31), 12))    #
    t_std = TU.timeseries_std(last_monday, "y", 52, extend=1)  #
    if year == first_date.timetuple().tm_year: t_std = t_std[:-1]

    #
    tic("Slicing")
    t_std_long = TU.timeseries_std(
        last_monday,
        TU.periods_in_interval(last_monday, dt.date(year - 1, 11, 30), 12))
    t_std_long_p1m = [(x + relativedelta(months=1)).timestamp()
                      for x in TU.tr(t_std_long)]
    real_p1m = compare(t_min, t_std_long)  # 实际最早日期和标准序列日期比较
    p1m_std = compare(t_std_long_p1m, t_std)  # 加一个月的标准序列日期和标准序列日期比较
    data_used = [p1m_std[x - 1] for x in real_p1m]

    tic("Grouping...")
    ds = [d[idx_slice[i]:idx_slice[i + 1]] for i in range(len(idx_slice) - 1)]
    ts = [x["statistic_date"].tolist() for x in ds]
    navs = [x["nav"].tolist() for x in ds]

    tic("Matching...")
    matchs = [TU.outer_match4index_w(x, t_std, False) for x in ts]

    tic("Getting Result...")
    t_matchs = [x[0] for x in matchs]
    t_matchs = [TU.tr(x) for x in t_matchs]
コード例 #3
0
ファイル: fund_index_all_w.py プロジェクト: dxcv/fund
def calculate(idx, export_path=None):
    dfs = pd.DataFrame()

    PEIndex = sf.PEIndex(idx)
    first_date = PEIndex.firstmonday

    result_r = {}
    components_num = {}
    components = {}
    for year in range(first_date.timetuple().tm_year, _process_date.year + 1):

        if year == _process_date.year:
            month = _process_date.month
            day = tu.date_of_weekday(_process_date, 0, (0, 0)).day
            if day > _process_date.day:  # 修正周一在上个月,跨月后产生的日期问题
                month -= 1
        else:
            month = 12
            day = 31

        date_s = dt.date(year, month, day)  #

        sql_i = sf.SQL_PEIndex(idx, year).yeardata_w["nv"]
        sql_mindate = sf.SQL_PEIndex(idx, year).yeardata_w["t_min"]

        conn = engine_rd.connect()

        su.tic("Getting Data")
        d = pd.read_sql(sql_i, conn)
        d.index = range(len(d))

        t_min = pd.read_sql(sql_mindate,
                            conn)["statistic_date_earliest"].tolist()
        t_min = [time.mktime(x.timetuple()) for x in t_min]  #
        conn.close()

        su.tic("Preprocessing...")
        d["statistic_date"] = d["statistic_date"].apply(
            lambda x: time.mktime(x.timetuple()))
        d_dd = d.drop_duplicates("fund_id")
        idx_slice = d_dd.index.tolist()
        idx_slice.append(len(d))
        ids = d_dd["fund_id"].tolist()

        last_monday = date_s - dt.timedelta(
            cld.weekday(date_s.year, date_s.month, date_s.day))  #
        t_std = tu.timeseries_std(last_monday, "a", 52, extend=1)  #
        if year == first_date.timetuple().tm_year:
            t_std = t_std[:-1]

        #
        su.tic("Slicing")
        t_std_long = tu.timeseries_std(
            last_monday,
            tu.periods_in_interval(last_monday, dt.date(year - 1, 11, 30), 12))
        t_std_long_p1m = [(x + relativedelta(months=1)).timestamp()
                          for x in tu.tr(t_std_long)]
        real_p1m = su.compare(t_min, t_std_long)  # 实际最早日期和标准序列日期比较
        p1m_std = su.compare(t_std_long_p1m, t_std)  # 加一个月的标准序列日期和标准序列日期比较
        data_used = [p1m_std[x - 1] for x in real_p1m]

        su.tic("Grouping...")
        ds = [
            d[idx_slice[i]:idx_slice[i + 1]]
            for i in range(len(idx_slice) - 1)
        ]
        ts = [x["statistic_date"].tolist() for x in ds]
        navs = [x["nav"].tolist() for x in ds]

        su.tic("Matching...")
        matchs = [tu.outer_match4index_w(x, t_std, False) for x in ts]

        su.tic("Getting Result...")
        # t_matchs = [x[0] for x in matchs]
        # t_matchs = [tu.tr(x) for x in t_matchs]
        idx_matchs = [x[1] for x in matchs]
        nav_matchs = [[
            navs[i][idx] if idx is not None else None
            for idx in idx_matchs[i].values()
        ] for i in range(len(idx_matchs))]

        su.tic("Calculating Index...")
        nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix()
        print(nvs.shape)
        for i in range(len(ids)):
            nvs[data_used[i] + 1:, i] = np.nan

        rs = nvs[:-1] / nvs[1:] - 1
        rs[rs > 10] = np.nan
        rs[rs < -1] = np.nan
        r = np.nanmean(rs, axis=1)
        r[np.isnan(r)] = 0

        result_r[year] = r
        components_num[year] = np.sum(~np.isnan(rs), axis=1)

        # log samples
        tmp = pd.DataFrame(nvs, columns=ids).T
        tmp["fund_id"] = tmp.index
        tmp = tmp[[tmp.columns[-1], *tmp.columns[:-1]]]
        components[year] = tmp

        su.tic("Year:{0}, Done...".format(year))
    values_r = []
    values_num = []
    for year in range(first_date.timetuple().tm_year, date_s.year + 1):
        if len(values_r) == 0:
            values_r = result_r[year].tolist()[::-1]
            values_num = components_num[year].tolist()[::-1]
        else:
            values_r.extend(result_r[year].tolist()[::-1])
            values_num.extend(components_num[year].tolist()[::-1])

    result = (np.array(values_r) + 1).cumprod() * 1000
    result = result.tolist()
    result.insert(0, 1000)
    values_num.insert(0, 0)

    tag = tu.timeseries_std(dt.datetime(year, month, day),
                            tu.periods_in_interval(
                                dt.datetime(year, month, day),
                                dt.datetime(first_date.year, 1, 1), 12),
                            52,
                            extend=5)[::-1]
    tag = [x for x in tag if x >= first_date.timestamp()]
    tag = [dt.date.fromtimestamp(x) for x in tag]

    # local debug
    op = pd.DataFrame(list(zip(tag, result, values_num)))
    op.columns = ["statistic_date", "index_value", "funds_num"]

    cols = [
        "index_id", "index_name", "typestandard_code", "typestandard_name",
        "type_code", "type_name", "stype_code", "stype_name", "index_method",
        "data_source", "data_source_name"
    ]
    values = [
        PEIndex.id, PEIndex.name, PEIndex.typestandard["code"],
        PEIndex.typestandard["name"], PEIndex.type["code"],
        PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1,
        0, "私募云通"
    ]
    col_dict = dict(zip(cols, values))
    for col, val in col_dict.items():
        op[col] = val

    dfs = dfs.append(op)

    if export_path is not None:
        tmp = tag.copy()
        for year in sorted(components.keys(), reverse=True):
            # print(year, len(tmp))
            components[year].columns = [
                "fund_id",
                *[tmp.pop() for i in range(len(components[year].columns) - 2)],
                tmp[-1]
            ]

        io.export_to_xl(
            components, "{sd}_{index_name}_w_samples".format(
                sd=last_monday.strftime("%Y%m%d"), index_name=PEIndex.id),
            export_path)

    return dfs
コード例 #4
0
def calculate():
    conn = engine_rd.connect()

    year, month = yesterday.year, yesterday.month
    month_range = cld.monthrange(year, month)[1]
    time_to_fill = sf.Time(dt.datetime(year, month, month_range))
    year, month = time_to_fill.year, time_to_fill.month

    bms_used = [
        "hs300", "csi500", "sse50", "ssia", "cbi", "y1_treasury_rate", "nfi"
    ]
    sql_bm = sf.SQL.market_index(date=time_to_fill.today,
                                 benchmarks=bms_used,
                                 whole=True)  # Get benchmark prices
    bm = pd.read_sql(sql_bm, conn)
    # bm.loc[bm["statistic_date"] == dt.date(1995, 8, 16), "y1_treasury_rate"] = 2.35

    bm["y1_treasury_rate"] = bm["y1_treasury_rate"].fillna(method="backfill")
    bm["y1_treasury_rate"] = bm["y1_treasury_rate"].apply(su.annually2weekly)
    bm["statistic_date"] = bm["statistic_date"].apply(su.date2tstp)

    prices_bm = [
        bm.dropna(subset=[bm_name])[bm_name].tolist() for bm_name in bms_used
    ]
    ts_bm = [
        bm.dropna(subset=[bm_name])["statistic_date"].tolist()
        for bm_name in bms_used
    ]

    prices = prices_bm.copy()
    ts = ts_bm.copy()

    t_mins_pe_all = sf.PEIndex().firstmonday  # 寻找指数中可被计算的
    pesid_used = []
    for k in t_mins_pe_all:
        if t_mins_pe_all[k].year < year:
            pesid_used.append(k)
        elif t_mins_pe_all[k].year == year:
            if t_mins_pe_all[k].month <= month:
                pesid_used.append(k)
            else:
                continue
        else:
            continue

    prices_pe = []
    ts_pe = []
    pes_used = []
    for idx in pesid_used:
        PE = sf.PEIndex(idx)
        pes_used.append(PE.id)
        sql_pe = sf.SQL.pe_index(time_to_fill.today, index_id=PE.id, freq="w")
        pe = pd.read_sql(sql_pe, conn)
        pe["statistic_date"] = pe["statistic_date"].apply(su.date2tstp)
        prices_pe.append(pe["index_value"].tolist())
        ts_pe.append(pe["statistic_date"].tolist())
    conn.close()

    prices.extend(prices_pe)
    ts.extend(ts_pe)

    t_mins_tstp = [min(x) for x in ts]
    t_mins = tu.tr(t_mins_tstp)

    intervals = table.intervals
    intervals1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
    intervals3 = [0, 1, 2, 3, 4, 5, 6, 10, 11]

    index_used = bms_used.copy()
    index_used.extend(pes_used)

    index_name = {
        "FI01": "私募全市场指数",
        "FI02": "阳光私募指数",
        "FI03": "私募FOF指数",
        "FI04": "股票多头策略私募指数",
        "FI05": "股票多空策略私募指数",
        "FI06": "市场中性策略私募指数",
        "FI07": "债券基金私募指数",
        "FI08": "管理期货策略私募指数",
        "FI09": "宏观策略私募指数",
        "FI10": "事件驱动策略私募指数",
        "FI11": "相对价值策略私募指数",
        "FI12": "多策略私募指数",
        "FI13": "组合投资策略私募指数",
        "hs300": "沪深300指数",
        "csi500": "中证500指数",
        "sse50": "上证50指数",
        "ssia": "上证A股指数",
        "cbi": "中债指数",
        "nfi": "南华商品指数",
        "y1_treasury_rate": "y1_treasury_rate"
    }

    result = []
    for mday in range(1, yesterday.day + 1):
        print("Day {0}: {1}".format(mday, dt.datetime.now()))

        date_s = sf.Time(dt.datetime(year, month,
                                     mday))  # Generate statistic_date

        #
        t_stds = [
            tu.timeseries_std(date_s.today, interval, 52, extend=1)
            for interval in intervals
        ]  # 标准序列
        t_std_lens = [len(x) - 1 for x in t_stds]  # 标准序列净值样本个数
        t_std_week = tu.timeseries_std(date_s.today, "w", 52, 1)  # 标准序列_本周
        ts_std_total = [
            tu.timeseries_std(date_s.today,
                              tu.periods_in_interval(date_s.today, t_min, 12),
                              extend=4) for t_min in t_mins
        ]  # 标准序列_成立以来
        ts_std_total = [
            t_std_total[:len([x for x in t_std_total if x >= t_min]) + 1]
            for t_std_total, t_min in zip(ts_std_total, t_mins_tstp)
        ]

        # 基准指数的标准序列_成立以来
        matchs = [
            tu.outer_match4indicator_w(t, t_std_all, False)
            for t, t_std_all in zip(ts, ts_std_total)
        ]
        idx_matchs = [x[1] for x in matchs]
        prices_total = [[
            price[ix] if ix is not None else None for ix in idx.values()
        ] for price, idx in zip(prices, idx_matchs)]

        # 基准指标的收益率_不同频率
        rs_total = [
            fi.gen_return_series(price_total) for price_total in prices_total
        ]

        # 无风险国债的收益率
        r_f_total = prices_total[5][
            1:]  # the list `y1_treasury_rate` in prices_total is not price, but return
        r_f_total = pd.DataFrame(r_f_total).fillna(
            method="backfill")[0].tolist()
        r_f_all = [r_f_total[:length - 1] for length in t_std_lens]
        r_f_all.append(r_f_total)

        # for i in range(len(index_used)):
        for i in range(len(index_used)):

            if index_name[index_used[i]] == "y1_treasury_rate": continue

            price_all = []
            r_all = []
            for j in range(7):
                if dt.date.fromtimestamp(
                    (t_mins[i] + relativedelta(months=intervals[j])
                     ).timestamp()) <= date_s.today:
                    price_all.append(prices_total[i][:t_std_lens[j]])
                    r_all.append(rs_total[i][:t_std_lens[j] - 1])
                else:
                    price_all.append([])
                    r_all.append([])
            for j in range(7, 11):
                price_all.append(prices_total[i][:t_std_lens[j]])
                if rs_total[i] is not None:
                    r_all.append(rs_total[i][:t_std_lens[j] - 1])
                else:
                    r_all.append([])

            price_all.append(prices_total[i])
            r_all.append(rs_total[i])
            price_all1 = [price_all[i] for i in intervals1]
            price_all3 = [price_all[i] for i in intervals3]
            r_all1 = [r_all[i] for i in intervals1]
            r_all3 = [r_all[i] for i in intervals3]

            r_f_all1 = [r_f_all[i] for i in intervals1][:-1]
            r_f_all3 = [r_f_all[i] for i in intervals3][:-1]
            r_f_all1.append(r_f_all[-1][:len(r_all[-1])])
            r_f_all3.append(r_f_all[-1][:len(r_all[-1])])

            ir = [fi.accumulative_return(price) for price in price_all1]
            ir_a = [fi.return_a(r) for r in r_all1]
            stdev_a = [fi.standard_deviation_a(r) for r in r_all3]
            dd_a = [
                fi.downside_deviation_a(r, r_f)
                for r, r_f in zip(r_all3, r_f_all3)
            ]
            mdd = [fi.max_drawdown(price)[0] for price in price_all3]
            sharpe_a = [
                fi.sharpe_a(r, r_f) for r, r_f in zip(r_all3, r_f_all3)
            ]
            calmar_a = [
                fi.calmar_a(price, r_f)
                for price, r_f in zip(price_all3, r_f_all3)
            ]
            sortino_a = [
                fi.sortino_a(r, r_f) for r, r_f in zip(r_all3, r_f_all3)
            ]
            p_earning_weeks = [fi.periods_positive_return(r) for r in r_all3]
            n_earning_weeks = [fi.periods_npositive_return(r) for r in r_all3]
            con_rise_weeks = [fi.periods_continuous_rise(r)[0] for r in r_all3]
            con_fall_weeks = [fi.periods_continuous_fall(r)[0] for r in r_all3]

            tmp = [
                ir, ir_a, stdev_a, dd_a, mdd, sharpe_a, calmar_a, sortino_a,
                p_earning_weeks, n_earning_weeks, con_rise_weeks,
                con_fall_weeks
            ]
            result_i = [index_used[i], index_name[index_used[i]], date_s.today]
            for x in tmp:
                result_i.extend(x)
            result.append(result_i)

    df = pd.DataFrame(result)
    df[list(range(3, 117))] = df[list(range(3, 117))].astype(np.float64)
    df[list(range(3, 117))] = df[list(range(3,
                                            117))].apply(lambda x: round(x, 6))
    df.columns = columns

    df.index_id = df.index_id.apply(lambda x: x.upper())
    return df
コード例 #5
0
def calculate(idx, export_path=None):
    dfs = pd.DataFrame()

    PEIndex = sf.PEIndex(idx)
    first_year = PEIndex.firstyear

    result_r = {}
    components_num = {}
    components = {}

    for year in range(first_year, process_date.year + 1):
        if year == process_date.timetuple().tm_year:
            month = process_date.month
        else:
            month = 12

        sql_i = sf.SQL_PEIndex(PEIndex.idx, year).yeardata_m

        conn = engine_rd.connect()

        su.tic("Getting Data")
        d = pd.read_sql(sql_i, conn)
        conn.close()

        su.tic("Preprocessing...")
        d["statistic_date"] = d["statistic_date"].apply(
            lambda x: time.mktime(x.timetuple()))
        d_dd = d.drop_duplicates("fund_id")
        idx_slice = d_dd.index.tolist()
        idx_slice.append(len(d))
        ids = d_dd["fund_id"].tolist()

        t_std = tu.timeseries_std(dt.datetime(year, month, 10),
                                  month,
                                  12,
                                  1,
                                  use_lastday=True)
        t_std1 = t_std[:-1]

        su.tic("Grouping...")
        ds = [
            d[idx_slice[i]:idx_slice[i + 1]]
            for i in range(len(idx_slice) - 1)
        ]
        ts = [x["statistic_date"].tolist() for x in ds]
        navs = [x["nav"].tolist() for x in ds]

        su.tic("Matching...")
        matchs1 = [tu.outer_match4index_f7(x, t_std1, False) for x in ts]
        matchs2 = [tu.outer_match4index_b7(x, t_std1) for x in ts]
        matchs3 = [tu.outer_match4index_m(x, t_std, False) for x in ts]
        matchs = [
            su.merge_result(x1, x2, x3)
            for x1, x2, x3 in zip(matchs1, matchs2, matchs3)
        ]

        su.tic("Getting Result...")
        t_matchs = [x[0] for x in matchs]
        t_matchs = [tu.tr(x) for x in t_matchs]
        idx_matchs = [x[1] for x in matchs]
        nav_matchs = [[
            navs[i][idx] if idx is not None else None
            for idx in idx_matchs[i].values()
        ] for i in range(len(idx_matchs))]

        su.tic("Calculating Index...")
        nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix()
        rs = nvs[:-1] / nvs[1:] - 1
        rs[rs > 30] = np.nan
        rs[rs < -1] = np.nan
        r = np.nanmean(rs, axis=1)
        r[np.isnan(r)] = 0

        result_r[year] = r
        components_num[year] = np.sum(~np.isnan(rs), axis=1)

        # log samples
        tmp = pd.DataFrame(nvs, columns=ids).T
        tmp["fund_id"] = tmp.index
        tmp = tmp[[tmp.columns[-1], *tmp.columns[:-1]]]
        components[year] = tmp

        su.tic("Year:{0}, Done...".format(year))

    values_r = []
    values_num = []
    for year in range(first_year, process_date.timetuple().tm_year + 1):
        if len(values_r) == 0:
            values_r = result_r[year].tolist()[::-1]
            values_num = components_num[year].tolist()[::-1]
        else:
            values_r.extend(result_r[year].tolist()[::-1])
            values_num.extend(components_num[year].tolist()[::-1])

    result = (np.array(values_r) + 1).cumprod() * 1000
    result = result.tolist()
    result.insert(0, 1000)
    values_num.insert(0, 0)

    # tag = tu.timeseries_std(dt.datetime(year, month + 1, 10),
    #                         tu.periods_in_interval(dt.datetime(year, month + 1, 10), dt.datetime(first_year, 1, 10),
    #                                                12), 12)[::-1]
    tag = tu.timeseries_std(
        dt.datetime(year, month + 1, 10),
        tu.periods_in_interval(dt.datetime(year, month + 1, 10),
                               dt.datetime(first_year, 1, 10), 12), 12)[::-1]
    tag = [dt.date.fromtimestamp(x - 864000) for x in tag]

    op = pd.DataFrame(list(zip(tag, result, values_num)))
    op.columns = ["statistic_date", "index_value", "funds_num"]

    cols = [
        "index_id", "index_name", "typestandard_code", "typestandard_name",
        "type_code", "type_name", "stype_code", "stype_name", "index_method",
        "data_source", "data_source_name"
    ]
    values = [
        PEIndex.id, PEIndex.name, PEIndex.typestandard["code"],
        PEIndex.typestandard["name"], PEIndex.type["code"],
        PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1,
        0, "私募云通"
    ]
    col_dict = dict(zip(cols, values))
    for col, val in col_dict.items():
        op[col] = val

    dfs = dfs.append(op[:-1])

    if export_path is not None:
        tmp = tag.copy()
        for year in sorted(components.keys(), reverse=True):
            print(year, len(tmp))
            components[year].columns = [
                "fund_id",
                *[tmp.pop() for i in range(len(components[year].columns) - 2)],
                tmp[-1]
            ]
        io.export_to_xl(
            components,
            "{sd}_{index_name}_m_samples".format(sd=tag[-2].strftime("%Y%m%d"),
                                                 index_name=PEIndex.id),
            export_path)

    return dfs
コード例 #6
0
def cal_std(fund_ids=None, whole=False):
    """
    每次计算一个基金
    Args:
        fund_ids: str
        whole:

    Returns:

    """
    if whole is True:
        sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard"
    else:
        sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard \
                    WHERE update_time >= {ut}".format(ut=yesterday)

    if fund_ids:
        if 'WHERE' in sql_navs:
            sql_navs += " AND fund_id in ({})".format(fund_ids)
        else:
            sql_navs += " WHERE fund_id in ({})".format(fund_ids)

    su.tic("Fetching nv Data......")
    df_nav = pd.read_sql(sql_navs, conn)

    criterias = [(df_nav["nav"] >= 0.2), (df_nav["added_nav"] >= 0.2),
                 (df_nav["statistic_date"] >= dt.date(1970, 1, 2)),
                 (df_nav["statistic_date"] <= dt.date.today())]

    su.tic("Preprocessing......")
    criteria = get_creterias(criterias)
    df_nav = df_nav.loc[criteria].sort_values(["fund_id", "statistic_date"],
                                              ascending=[True, False])
    df_nav.index = range(len(df_nav))
    ids = df_nav["fund_id"].drop_duplicates().tolist()

    t_mins = list(df_nav.groupby("fund_id")["statistic_date"].min())
    t_mins_tstp = [time.mktime(x.timetuple()) for x in t_mins]
    t_maxs = list(df_nav.groupby("fund_id")["statistic_date"].max())
    t_maxs_tstp = [time.mktime(x.timetuple()) for x in t_maxs]

    idx4slice = su.idx4slice(df_nav, slice_by="fund_id")
    navs = su.slice(df_nav, idx4slice, "nav")
    added_navs = su.slice(df_nav, idx4slice, "added_nav")
    swanavs = su.slice(df_nav, idx4slice, "swanav")
    t_reals = su.slice(df_nav, idx4slice, "statistic_date")
    t_reals_tstp = []
    for t_real in t_reals:
        t_reals_tstp.append([time.mktime(x.timetuple()) for x in t_real])

    t_std_alls_w = [
        tu.timeseries_std(friday,
                          tu.periods_in_interval(friday, t_min, 12),
                          extend=4) for t_min in t_mins
    ]  # 标准序列_成立以来
    t_std_alls_w = [
        t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1]
        for t_std_all, t_min in zip(t_std_alls_w, t_mins_tstp)
    ]
    t_std_alls_w = [
        t_std_all[-len([x for x in t_std_all if x < t_max]) - 1:]
        for t_std_all, t_max in zip(t_std_alls_w, t_maxs_tstp)
    ]

    t_std_alls_m = [
        tu.timeseries_std(date,
                          tu.periods_in_interval(date, t_min, 12),
                          periods_y=12,
                          use_lastday=True,
                          extend=6) for date, t_min in zip(t_maxs, t_mins)
    ]  # 标准序列_成立以来
    t_std_alls_m = [
        t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1]
        for t_std_all, t_min in zip(t_std_alls_m, t_mins_tstp)
    ]

    su.tic("Matching......")
    matchs_w = [
        tu.outer_match4indicator_w(t_real, t_std)
        for t_real, t_std in zip(t_reals_tstp, t_std_alls_w)
    ]
    idx_matchs_w = [x[1] for x in matchs_w]
    nav_matchs_w = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(navs, idx_matchs_w)]
    anav_matchs_w = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(added_navs, idx_matchs_w)]
    swanav_matchs_w = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(swanavs, idx_matchs_w)]
    t_matchs_w = [[
        t_real[ix] if ix is not None else None for ix in idx.values()
    ] for t_real, idx in zip(t_reals, idx_matchs_w)]
    t_matchs_std_w = [
        tu.tr(x[:-1], "date") if x is not None else None for x in t_std_alls_w
    ]

    matchs_m = [
        tu.outer_match4indicator_m(t_real, t_std)
        for t_real, t_std in zip(t_reals_tstp, t_std_alls_m)
    ]
    idx_matchs_m = [x[1] for x in matchs_m]
    nav_matchs_m = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(navs, idx_matchs_m)]
    anav_matchs_m = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(added_navs, idx_matchs_m)]
    swanav_matchs_m = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(swanavs, idx_matchs_m)]
    t_matchs_m = [[
        t_real[ix] if ix is not None else None for ix in idx.values()
    ] for t_real, idx in zip(t_reals, idx_matchs_m)]
    t_matchs_std_m = [
        tu.tr(x[:-1], "date") if x is not None else None for x in t_std_alls_m
    ]

    result_w = {
        "fund_id": ids,
        "nav": nav_matchs_w,
        "added_nav": anav_matchs_w,
        "swanav": swanav_matchs_w,
        "statistic_date": t_matchs_w,
        "statistic_date_std": t_matchs_std_w
    }

    result_m = {
        "fund_id": ids,
        "nav": nav_matchs_m,
        "added_nav": anav_matchs_m,
        "swanav": swanav_matchs_m,
        "statistic_date": t_matchs_m,
        "statistic_date_std": t_matchs_std_m
    }

    su.tic("Merging Result......")
    result = {}
    result["w"] = pd.DataFrame.from_dict(merge_result(result_w, ids))
    result["m"] = pd.DataFrame.from_dict(merge_result(result_m, ids))

    return result