Esempio n. 1
0
def is_daily(ids_to_update):
    sql_fnds = "SELECT fund_id, statistic_date FROM fund_nv_data_standard WHERE statistic_date >= '19700101' \
                AND fund_id IN {ids}".format(ids=sf.SQL.ids4sql(ids_to_update))
    su.tic("Fetching get_data......")
    fnds = pd.read_sql(sql_fnds, engine_rd)
    fnds = fnds.dropna().sort_values(by=["fund_id", "statistic_date"],
                                     ascending=[True, False])
    fnds.index = range(len(fnds))
    fnds["statistic_date"] = fnds["statistic_date"].apply(
        lambda x: time.mktime(x.timetuple()))

    su.tic("Calculating......")
    ids4slice = su.idx4slice(fnds, slice_by="fund_id")
    ids = fnds.drop_duplicates(subset=["fund_id"])["fund_id"].tolist()
    t_reals = su.slice(fnds, ids4slice, "statistic_date")

    match_ratio = [
        (len(t_real) - 1) /
        ((t_real[0] - t_real[-1]) / 86400) if len(t_real) > 1 else None
        for t_real in t_reals
    ]
    matched = [
        "日度" if (x is not None and x >= 0.5) else None for x in match_ratio
    ]
    result = dict([x for x in list(zip(ids, matched)) if x[1] is not None])
    return result
Esempio n. 2
0
def merge(ddls, cmts):
    su.tic("merging results...")
    for k, v in ddls.items():
        v["name_sc"] = v["name"].apply(lambda x: get_dictvalue(cmts, x, 0))
        v["comments"] = v["name"].apply(lambda x: get_dictvalue(cmts, x, 1))
        ddls[k] = v[["name", "name_sc", "type", "default", "nullable", "comments", "is_pk"]]
    return ddls
Esempio n. 3
0
def main():
    server_name, db_name, file_name = get_source()
    engine_read = cfg.load_engine()[server_name]
    ddls = get_ddl(db_name, engine_read)
    cmts = get_cmt()
    dfs = merge(ddls, cmts)
    export(dfs, file_name=file_name)
    su.tic("done...")
Esempio n. 4
0
def fetch_data(sqls, conn):
    su.tic("fetching new get_data...")
    dfs = {}
    for tb_name, sql in sqls.items():
        su.tic("fetching get_data of `{tb_name}`...".format(tb_name=tb_name))
        dfs[tb_name] = pd.read_sql(sql, conn)
    dfs["m"] = [dfs[_tb["re_m"]], dfs[_tb["ri_m"]], dfs[_tb["sub_m"]]]
    return dfs
Esempio n. 5
0
def delete_old(data, tb_name, conn):
    su.tic("delete old get_data...")
    ids4delete = sf.SQL.ids4sql(data["fund_id"].tolist())
    sql_delete = "DELETE FROM {tb_name} WHERE fund_id IN {ids}".format(
        tb_name=tb_name,
        ids=ids4delete
    )
    conn.execute(sql_delete)
Esempio n. 6
0
def merge_result(dfs, how="inner"):
    su.tic("merge new get_data...")
    d = dfs[0]
    for i in range(1, len(dfs)):
        d = pd.merge(d,
                     dfs[i],
                     how=how,
                     on=["fund_id", "statistic_date", "benchmark"],
                     suffixes=["", "_dup"])
    return d
Esempio n. 7
0
def export(df_dict, file_name="ddls", path=su.get_desktop_path()):
    su.tic("exporting...")
    file_path = os.path.join(path, file_name)
    if ".xlsx" not in file_path.lower():
        file_path += ".xlsx"
    tmp = pd.DataFrame()
    tmp.to_excel(file_path, index=False)

    dict_items = sorted(df_dict.items(), key=lambda d: d[0], reverse=False)

    # for k, v in df_dict.items():
    for k, v in dict_items:
        book = load_workbook("{path}".format(path=file_path))
        writer = pd.ExcelWriter("{path}".format(path=file_path), engine='openpyxl')
        writer.book = book
        writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
        v = v.fillna("")
        v = v.astype(str)
        v.to_excel(writer, "{tb_name}".format(tb_name=k, index=False), index=False)
        writer.save()
Esempio n. 8
0
def get_cmt():
    su.tic("fetching comments...")

    dfs = pd.read_excel("D:\Work\Others\项目\国泰君安项目\国泰君安项目表结构文件\私募产品数据库表结构(基础表)4.0版本.xls", sheetname=None, header=2)
    cmt_dict = {}
    for k, v in dfs.items():
        v = v.drop(v.columns[6:], axis=1)
        v.columns = ["name", "name_sc", "type", "is_pk", "default", "comment"]
        dfs[k] = v
        tmp = dict(zip(v["name"], list(zip(v["name_sc"], v["comment"]))))
        cmt_dict.update(tmp)
    cmt_dict["year_persistence"] = cmt_dict["year_persietance"]
    cmt_dict["total_persistence"] = cmt_dict["total_persietance"]
    cmt_dict["m3_persistence"] = cmt_dict["m3_persietance"]
    cmt_dict["m6_persistence"] = cmt_dict["m6_persietance"]
    cmt_dict["y1_persistence"] = cmt_dict["y1_persietance"]
    cmt_dict["y2_persistence"] = cmt_dict["y2_persietance"]
    cmt_dict["y3_persistence"] = cmt_dict["y3_persietance"]
    cmt_dict["y5_persistence"] = cmt_dict["y5_persietance"]

    return cmt_dict
Esempio n. 9
0
def is_monthly(ids_to_update):
    su.tic("Fetching get_data......")
    sql_std_m = "SELECT fund_id, statistic_date_std, statistic_date FROM fund_nv_standard_m \
                 WHERE fund_id IN {ids} \
                 AND fund_id IN (SELECT fund_id FROM (SELECT fund_id, COUNT(fund_id) cnt FROM fund_nv_standard_m \
                 GROUP BY fund_id HAVING cnt >= 3) T)".format(
        ids=sf.SQL.ids4sql(ids_to_update))
    d_std_m = pd.read_sql(sql_std_m, engine_rd)

    ids4slice = su.idx4slice(d_std_m, slice_by="fund_id")
    ids = d_std_m.drop_duplicates(subset=["fund_id"])["fund_id"].tolist()
    t_reals = su.slice(d_std_m, ids4slice, "statistic_date")
    t_stds = su.slice(d_std_m, ids4slice, "statistic_date_std")
    match_ratio = [
        len([x for x in t_real if x is not None]) / len(t_std)
        for t_real, t_std in zip(t_reals, t_stds)
    ]
    matched = [
        "月度" if (x is not None and x >= 0.5) else None for x in match_ratio
    ]
    result = dict([x for x in list(zip(ids, matched)) if x[1] is not None])
    return result
Esempio n. 10
0
def get_ddl(schema, engine):
    su.tic("fetching ddl...")
    insp = reflection.Inspector.from_engine(engine)
    tb_names = insp.get_table_names(schema)

    cols = {tb: [] for tb in tb_names}
    pks = {tb: [] for tb in tb_names}
    for tb_name in tb_names:
        cols[tb_name] = insp.get_columns(table_name=tb_name, schema=schema)
        pks[tb_name] = insp.get_primary_keys(table_name=tb_name, schema=schema)

    for k, v in cols.items():
        tb = pd.DataFrame.from_dict(v)
        tb = tb[["name", "type", "nullable", "default"]]
        cols[k] = tb

    for k, v in pks.items():
        cols[k]["is_pk"] = cols[k]["name"].apply(lambda x: x in v)
        cols[k]["is_pk"] = cols[k]["is_pk"].astype(bool)
        # cols[k]["is_pk"] = cols[k]["is_pk"].apply(lambda x: int(x))
        # cols[k]["is_pk"] = cols[k]["is_pk"].astype(int)
        cols[k].loc[cols[k]["is_pk"] == False, "is_pk"] = None

    return cols
Esempio n. 11
0
def main():
    total_fund_ids = get_fund_ids()

    #一次计算100个基金
    STEP = 100
    sliced = [
        total_fund_ids[i:i + STEP] for i in range(0, len(total_fund_ids), STEP)
    ]

    index = 1
    for fund_ids in sliced:
        sql_fund_ids = ",".join(["'{}'".format(x) for x in fund_ids])

        result = cal_std(sql_fund_ids, True)

        su.tic("Process:{}/{}".format(index, len(sliced)))
        index += 1

        su.tic("Data to DB...month..{}.".format(sql_fund_ids))
        conn.execute(
            "DELETE FROM fund_nv_standard_m where fund_id in ({})".format(
                sql_fund_ids))
        result["m"].to_sql("fund_nv_standard_m",
                           conn,
                           if_exists="append",
                           index=False,
                           chunksize=10000)

        su.tic("Data to DB...week..{}.".format(','.join(fund_ids)))
        conn.execute(
            "DELETE FROM fund_nv_standard_w where fund_id in ({})".format(
                sql_fund_ids))
        result["w"].to_sql("fund_nv_standard_w",
                           conn,
                           if_exists="append",
                           index=False,
                           chunksize=10000)
Esempio n. 12
0

def main():
    tb_w = sf.Table("w", "indicator")
    tb_m = sf.Table("m", "indicator")

    engines = cfg.load_engine()
    engine_read = engines["2Gb"]
    engine_write = engines["2Gb"]
    conn_read = engine_read.connect()
    conn_write = engine_write.connect()

    tb_names = [
        _tb["re_w"], _tb["ri_w"], _tb["sub_w"], _tb["re_m"], _tb["ri_m"],
        _tb["sub_m"]
    ]
    sqls = gen_sql_indicator_latest(tb_names)
    dfs = fetch_data(sqls, conn_read)

    dw = merge_result(dfs["w"], how="inner")
    dm = merge_result(dfs["m"], how="inner")

    refresh(dw, tb_w, conn_write)
    refresh(dm, tb_m, conn_write)


if __name__ == "__main__":
    su.tic("fund_freq_indicator...")
    main()
    su.tic("Done...")
Esempio n. 13
0
        result = []
        for mday in range(3, 4):
            print("Day {0}: {1}".format(mday, dt.datetime.now()))
            conn = engine_rd.connect()

            date_s = sf.Time(dt.datetime(year, month, mday) - dt.timedelta(mday))  # Generate statistic_date

            sql_ids_updated = sf.SQL.ids_updated_sd(date_s.today, freq="m")
            ids_used = [x[0] for x in engine_rd.execute(sql_ids_updated).fetchall()]

            print("length of ids: {0}".format(len(ids_used)))

            if len(ids_used) == 0: continue  # Check whether there are updated get_data this day

            tic("Getting Data")
            sql_fdate = sf.SQL.foundation_date(ids_used)  # Get their foundation date
            fdate = pd.read_sql(sql_fdate, engine_rd)
            ids_fdate = fdate["fund_id"].tolist()
            t_min = fdate

            if len(ids_fdate) != len(
                    ids_used):  # Check whether all fund has foundation date get_data. If not, use first nv get_data instead.
                ids_diff = list(set(ids_used) - set(ids_fdate))
                sql_fnvdate = sf.SQL.firstnv_date(ids_diff)
                fnvdate = pd.read_sql(sql_fnvdate, engine_rd)
                ids_diff = fnvdate["fund_id"].tolist()
                t_min = pd.merge(fdate, fnvdate, "outer")
                print("Some foundation_date missed, use first nv date...", len(ids_diff), len(ids_fdate), len(t_min))
                t_min = t_min.sort_values("fund_id", ascending=True)  # Sort the df by fund id ASC
Esempio n. 14
0
def calculate(idx):
    dfs = pd.DataFrame()

    PEIndex = sf.PEIndex(idx)
    first_year = PEIndex.firstyear

    result_r = {}
    components_num = {}

    for year in range(first_year, process_date.year + 1):
        if year == process_date.timetuple().tm_year:
            month = process_date.month
        else:
            month = 12

        sql_i = sf.SQL_PEIndex(PEIndex.idx, year).yeardata_m

        conn = engine_rd.connect()

        date_s = dt.date(year, month, 1) - dt.timedelta(1)

        su.tic("Getting Data")
        d = pd.read_sql(sql_i, conn)
        conn.close()

        su.tic("Preprocessing...")
        d["statistic_date"] = d["statistic_date"].apply(
            lambda x: time.mktime(x.timetuple()))
        d_dd = d.drop_duplicates("fund_id")
        idx_slice = d_dd.index.tolist()
        idx_slice.append(len(d))

        t_std = tu.timeseries_std(dt.datetime(year, month, 10),
                                  month,
                                  12,
                                  1,
                                  use_lastday=True)
        t_std1 = t_std[:-1]

        su.tic("Grouping...")
        ds = [
            d[idx_slice[i]:idx_slice[i + 1]]
            for i in range(len(idx_slice) - 1)
        ]
        ts = [x["statistic_date"].tolist() for x in ds]
        navs = [x["nav"].tolist() for x in ds]

        su.tic("Matching...")
        matchs1 = [tu.outer_match4index_f7(x, t_std1, False) for x in ts]
        matchs2 = [tu.outer_match4index_b7(x, t_std1) for x in ts]
        matchs3 = [tu.outer_match4index_m(x, t_std, False) for x in ts]
        matchs = [
            su.merge_result(x1, x2, x3)
            for x1, x2, x3 in zip(matchs1, matchs2, matchs3)
        ]

        su.tic("Getting Result...")
        t_matchs = [x[0] for x in matchs]
        t_matchs = [tu.tr(x) for x in t_matchs]
        idx_matchs = [x[1] for x in matchs]
        nav_matchs = [[
            navs[i][idx] if idx is not None else None
            for idx in idx_matchs[i].values()
        ] for i in range(len(idx_matchs))]

        su.tic("Calculating Index...")
        nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix()
        rs = nvs[:-1] / nvs[1:] - 1
        rs[rs > 30] = np.nan
        rs[rs < -1] = np.nan
        r = np.nanmean(rs, axis=1)
        r[np.isnan(r)] = 0

        result_r[year] = r
        components_num[year] = np.sum(~np.isnan(rs), axis=1)
        su.tic("Year:{0}, Done...".format(year))

    values_r = []
    values_num = []
    for year in range(first_year, process_date.timetuple().tm_year + 1):
        if len(values_r) == 0:
            values_r = result_r[year].tolist()[::-1]
            values_num = components_num[year].tolist()[::-1]
        else:
            values_r.extend(result_r[year].tolist()[::-1])
            values_num.extend(components_num[year].tolist()[::-1])

    adjust_periods = 1
    date_tmp = date_s - relativedelta.relativedelta(months=adjust_periods + 1)
    date_tmp = dt.date(date_tmp.year, date_tmp.month,
                       cld.monthrange(date_tmp.year, date_tmp.month)[1])
    sql_base = "SELECT index_value FROM fund_month_index_static WHERE index_id = '{idx_id}' \
    AND statistic_date = '{sd}'".format(idx_id=PEIndex.id, sd=date_tmp)
    base = pd.read_sql(sql_base, engine_rd).get_value(0, "index_value")

    result = (np.array(values_r)[-(adjust_periods + 1) - 1:] +
              1).cumprod() * base
    result = result.tolist()
    values_num = values_num[-(adjust_periods + 1) - 1:]

    tag = tu.timeseries_std(
        dt.datetime(year, month + 1, 10),
        tu.periods_in_interval(dt.datetime(year, month + 1, 10),
                               dt.datetime(first_year, 1, 10), 12), 12)[::-1]
    tag = [dt.date.fromtimestamp(x - 864000) for x in tag]
    tag = tag[-(adjust_periods + 1) - 1:]

    op = pd.DataFrame(list(zip(tag, result, values_num)))
    op.columns = ["statistic_date", "index_value", "funds_num"]

    cols = [
        "index_id", "index_name", "typestandard_code", "typestandard_name",
        "type_code", "type_name", "stype_code", "stype_name", "index_method",
        "data_source", "data_source_name"
    ]
    values = [
        PEIndex.id, PEIndex.name, PEIndex.typestandard["code"],
        PEIndex.typestandard["name"], PEIndex.type["code"],
        PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1,
        0, "私募云通"
    ]
    col_dict = dict(zip(cols, values))
    for col, val in col_dict.items():
        op[col] = val

    dfs = dfs.append(op[:-1])
    return dfs
Esempio n. 15
0
def cal_std(fund_ids=None, whole=False):
    """
    每次计算一个基金
    Args:
        fund_ids: str
        whole:

    Returns:

    """
    if whole is True:
        sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard"
    else:
        sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard \
                    WHERE update_time >= {ut}".format(ut=yesterday)

    if fund_ids:
        if 'WHERE' in sql_navs:
            sql_navs += " AND fund_id in ({})".format(fund_ids)
        else:
            sql_navs += " WHERE fund_id in ({})".format(fund_ids)

    su.tic("Fetching nv Data......")
    df_nav = pd.read_sql(sql_navs, conn)

    criterias = [(df_nav["nav"] >= 0.2), (df_nav["added_nav"] >= 0.2),
                 (df_nav["statistic_date"] >= dt.date(1970, 1, 2)),
                 (df_nav["statistic_date"] <= dt.date.today())]

    su.tic("Preprocessing......")
    criteria = get_creterias(criterias)
    df_nav = df_nav.loc[criteria].sort_values(["fund_id", "statistic_date"],
                                              ascending=[True, False])
    df_nav.index = range(len(df_nav))
    ids = df_nav["fund_id"].drop_duplicates().tolist()

    t_mins = list(df_nav.groupby("fund_id")["statistic_date"].min())
    t_mins_tstp = [time.mktime(x.timetuple()) for x in t_mins]
    t_maxs = list(df_nav.groupby("fund_id")["statistic_date"].max())
    t_maxs_tstp = [time.mktime(x.timetuple()) for x in t_maxs]

    idx4slice = su.idx4slice(df_nav, slice_by="fund_id")
    navs = su.slice(df_nav, idx4slice, "nav")
    added_navs = su.slice(df_nav, idx4slice, "added_nav")
    swanavs = su.slice(df_nav, idx4slice, "swanav")
    t_reals = su.slice(df_nav, idx4slice, "statistic_date")
    t_reals_tstp = []
    for t_real in t_reals:
        t_reals_tstp.append([time.mktime(x.timetuple()) for x in t_real])

    t_std_alls_w = [
        tu.timeseries_std(friday,
                          tu.periods_in_interval(friday, t_min, 12),
                          extend=4) for t_min in t_mins
    ]  # 标准序列_成立以来
    t_std_alls_w = [
        t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1]
        for t_std_all, t_min in zip(t_std_alls_w, t_mins_tstp)
    ]
    t_std_alls_w = [
        t_std_all[-len([x for x in t_std_all if x < t_max]) - 1:]
        for t_std_all, t_max in zip(t_std_alls_w, t_maxs_tstp)
    ]

    t_std_alls_m = [
        tu.timeseries_std(date,
                          tu.periods_in_interval(date, t_min, 12),
                          periods_y=12,
                          use_lastday=True,
                          extend=6) for date, t_min in zip(t_maxs, t_mins)
    ]  # 标准序列_成立以来
    t_std_alls_m = [
        t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1]
        for t_std_all, t_min in zip(t_std_alls_m, t_mins_tstp)
    ]

    su.tic("Matching......")
    matchs_w = [
        tu.outer_match4indicator_w(t_real, t_std)
        for t_real, t_std in zip(t_reals_tstp, t_std_alls_w)
    ]
    idx_matchs_w = [x[1] for x in matchs_w]
    nav_matchs_w = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(navs, idx_matchs_w)]
    anav_matchs_w = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(added_navs, idx_matchs_w)]
    swanav_matchs_w = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(swanavs, idx_matchs_w)]
    t_matchs_w = [[
        t_real[ix] if ix is not None else None for ix in idx.values()
    ] for t_real, idx in zip(t_reals, idx_matchs_w)]
    t_matchs_std_w = [
        tu.tr(x[:-1], "date") if x is not None else None for x in t_std_alls_w
    ]

    matchs_m = [
        tu.outer_match4indicator_m(t_real, t_std)
        for t_real, t_std in zip(t_reals_tstp, t_std_alls_m)
    ]
    idx_matchs_m = [x[1] for x in matchs_m]
    nav_matchs_m = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(navs, idx_matchs_m)]
    anav_matchs_m = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(added_navs, idx_matchs_m)]
    swanav_matchs_m = [[
        nav[ix] if ix is not None else None for ix in idx.values()
    ] for nav, idx in zip(swanavs, idx_matchs_m)]
    t_matchs_m = [[
        t_real[ix] if ix is not None else None for ix in idx.values()
    ] for t_real, idx in zip(t_reals, idx_matchs_m)]
    t_matchs_std_m = [
        tu.tr(x[:-1], "date") if x is not None else None for x in t_std_alls_m
    ]

    result_w = {
        "fund_id": ids,
        "nav": nav_matchs_w,
        "added_nav": anav_matchs_w,
        "swanav": swanav_matchs_w,
        "statistic_date": t_matchs_w,
        "statistic_date_std": t_matchs_std_w
    }

    result_m = {
        "fund_id": ids,
        "nav": nav_matchs_m,
        "added_nav": anav_matchs_m,
        "swanav": swanav_matchs_m,
        "statistic_date": t_matchs_m,
        "statistic_date_std": t_matchs_std_m
    }

    su.tic("Merging Result......")
    result = {}
    result["w"] = pd.DataFrame.from_dict(merge_result(result_w, ids))
    result["m"] = pd.DataFrame.from_dict(merge_result(result_m, ids))

    return result
Esempio n. 16
0
def calculate(idx, export_path=None):
    dfs = pd.DataFrame()

    PEIndex = sf.PEIndex(idx)
    first_date = PEIndex.firstmonday

    result_r = {}
    components_num = {}
    components = {}
    for year in range(first_date.timetuple().tm_year, _process_date.year + 1):

        if year == _process_date.year:
            month = _process_date.month
            day = tu.date_of_weekday(_process_date, 0, (0, 0)).day
            if day > _process_date.day:  # 修正周一在上个月,跨月后产生的日期问题
                month -= 1
        else:
            month = 12
            day = 31

        date_s = dt.date(year, month, day)  #

        sql_i = sf.SQL_PEIndex(idx, year).yeardata_w["nv"]
        sql_mindate = sf.SQL_PEIndex(idx, year).yeardata_w["t_min"]

        conn = engine_rd.connect()

        su.tic("Getting Data")
        d = pd.read_sql(sql_i, conn)
        d.index = range(len(d))

        t_min = pd.read_sql(sql_mindate,
                            conn)["statistic_date_earliest"].tolist()
        t_min = [time.mktime(x.timetuple()) for x in t_min]  #
        conn.close()

        su.tic("Preprocessing...")
        d["statistic_date"] = d["statistic_date"].apply(
            lambda x: time.mktime(x.timetuple()))
        d_dd = d.drop_duplicates("fund_id")
        idx_slice = d_dd.index.tolist()
        idx_slice.append(len(d))
        ids = d_dd["fund_id"].tolist()

        last_monday = date_s - dt.timedelta(
            cld.weekday(date_s.year, date_s.month, date_s.day))  #
        t_std = tu.timeseries_std(last_monday, "a", 52, extend=1)  #
        if year == first_date.timetuple().tm_year:
            t_std = t_std[:-1]

        #
        su.tic("Slicing")
        t_std_long = tu.timeseries_std(
            last_monday,
            tu.periods_in_interval(last_monday, dt.date(year - 1, 11, 30), 12))
        t_std_long_p1m = [(x + relativedelta(months=1)).timestamp()
                          for x in tu.tr(t_std_long)]
        real_p1m = su.compare(t_min, t_std_long)  # 实际最早日期和标准序列日期比较
        p1m_std = su.compare(t_std_long_p1m, t_std)  # 加一个月的标准序列日期和标准序列日期比较
        data_used = [p1m_std[x - 1] for x in real_p1m]

        su.tic("Grouping...")
        ds = [
            d[idx_slice[i]:idx_slice[i + 1]]
            for i in range(len(idx_slice) - 1)
        ]
        ts = [x["statistic_date"].tolist() for x in ds]
        navs = [x["nav"].tolist() for x in ds]

        su.tic("Matching...")
        matchs = [tu.outer_match4index_w(x, t_std, False) for x in ts]

        su.tic("Getting Result...")
        # t_matchs = [x[0] for x in matchs]
        # t_matchs = [tu.tr(x) for x in t_matchs]
        idx_matchs = [x[1] for x in matchs]
        nav_matchs = [[
            navs[i][idx] if idx is not None else None
            for idx in idx_matchs[i].values()
        ] for i in range(len(idx_matchs))]

        su.tic("Calculating Index...")
        nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix()
        print(nvs.shape)
        for i in range(len(ids)):
            nvs[data_used[i] + 1:, i] = np.nan

        rs = nvs[:-1] / nvs[1:] - 1
        rs[rs > 10] = np.nan
        rs[rs < -1] = np.nan
        r = np.nanmean(rs, axis=1)
        r[np.isnan(r)] = 0

        result_r[year] = r
        components_num[year] = np.sum(~np.isnan(rs), axis=1)

        # log samples
        tmp = pd.DataFrame(nvs, columns=ids).T
        tmp["fund_id"] = tmp.index
        tmp = tmp[[tmp.columns[-1], *tmp.columns[:-1]]]
        components[year] = tmp

        su.tic("Year:{0}, Done...".format(year))
    values_r = []
    values_num = []
    for year in range(first_date.timetuple().tm_year, date_s.year + 1):
        if len(values_r) == 0:
            values_r = result_r[year].tolist()[::-1]
            values_num = components_num[year].tolist()[::-1]
        else:
            values_r.extend(result_r[year].tolist()[::-1])
            values_num.extend(components_num[year].tolist()[::-1])

    result = (np.array(values_r) + 1).cumprod() * 1000
    result = result.tolist()
    result.insert(0, 1000)
    values_num.insert(0, 0)

    tag = tu.timeseries_std(dt.datetime(year, month, day),
                            tu.periods_in_interval(
                                dt.datetime(year, month, day),
                                dt.datetime(first_date.year, 1, 1), 12),
                            52,
                            extend=5)[::-1]
    tag = [x for x in tag if x >= first_date.timestamp()]
    tag = [dt.date.fromtimestamp(x) for x in tag]

    # local debug
    op = pd.DataFrame(list(zip(tag, result, values_num)))
    op.columns = ["statistic_date", "index_value", "funds_num"]

    cols = [
        "index_id", "index_name", "typestandard_code", "typestandard_name",
        "type_code", "type_name", "stype_code", "stype_name", "index_method",
        "data_source", "data_source_name"
    ]
    values = [
        PEIndex.id, PEIndex.name, PEIndex.typestandard["code"],
        PEIndex.typestandard["name"], PEIndex.type["code"],
        PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1,
        0, "私募云通"
    ]
    col_dict = dict(zip(cols, values))
    for col, val in col_dict.items():
        op[col] = val

    dfs = dfs.append(op)

    if export_path is not None:
        tmp = tag.copy()
        for year in sorted(components.keys(), reverse=True):
            # print(year, len(tmp))
            components[year].columns = [
                "fund_id",
                *[tmp.pop() for i in range(len(components[year].columns) - 2)],
                tmp[-1]
            ]

        io.export_to_xl(
            components, "{sd}_{index_name}_w_samples".format(
                sd=last_monday.strftime("%Y%m%d"), index_name=PEIndex.id),
            export_path)

    return dfs
Esempio n. 17
0
def main():
    su.tic("getting original get_data...")
    sql_fi = "SELECT fund_id, fund_name, fund_name_py, fund_full_name, reg_time,  foundation_date, fund_status, region, data_freq, open_date, fund_custodian, \
    fund_time_limit, fund_stockbroker, fee_subscription, fee_redeem, fee_trust, fee_manage, fee_pay, fund_member \
    FROM fund_info"

    d_fi = pd.read_sql(sql_fi, conn_read)

    sql_oi = "SELECT org_id, org_name, org_full_name, profile FROM org_info"
    d_oi = pd.read_sql(sql_oi, conn_read)

    sql_fom = "SELECT fund_id, org_id FROM fund_org_mapping WHERE org_type_code = 1"
    d_fom = pd.read_sql(sql_fom, conn_read)

    sql_ftm = "SELECT fund_id, typestandard_code, typestandard_name, type_code, type_name, stype_code, stype_name FROM fund_type_mapping WHERE typestandard_code IN (601, 602, 603, 604) AND flag=1"
    d_ftm = pd.read_sql(sql_ftm, conn_read)

    sql_ti = "SELECT id as fund_id, foundation_days_range FROM time_index WHERE id_category = 1"
    d_ti = pd.read_sql(sql_ti, conn_read)

    sql_fnd = "SELECT fnd1.fund_id, fnd2.nav, fnd2.added_nav, fnd2.swanav, fnd2.statistic_date \
    FROM (SELECT fund_id, MAX(statistic_date) as md FROM fund_nv_data_standard GROUP BY fund_id) fnd1 \
    JOIN fund_nv_data_standard fnd2 ON fnd1.fund_id = fnd2.fund_id AND fnd1.md = fnd2.statistic_date"

    d_fnd = pd.read_sql(sql_fnd, conn_read)

    su.tic("merging get_data...")
    tm = {}
    k_vs = [("typestandard_code", "typestandard_name"),
            ("type_code", "type_name"), ("stype_code", "stype_name")]
    for k_v in k_vs:
        tmp = d_ftm.loc[:, k_v].sort_values(
            by=k_v[0], ascending=[True]).drop_duplicates().dropna()
        tmp = dict(zip(tmp[k_v[0]], tmp[k_v[1]]))
        tm.update(tmp)
    tm.update({np.nan: None})

    ftm = dict(
        zip(zip(d_ftm["fund_id"], d_ftm["typestandard_code"]),
            zip(d_ftm["type_code"], d_ftm["stype_code"])))

    d = d_fi.copy()
    d = d.merge(d_fom, how="left", on="fund_id")
    d = d.merge(d_oi, how="left", on="org_id")
    d = d.merge(d_ti, how="left", on="fund_id")
    d = d.merge(d_fnd, how="left", on="fund_id")

    _trans = {1: 1, 2: 4, 3: 2, 4: 3}

    for tsc in range(1, 5):
        tmp = d_ftm.loc[d_ftm["typestandard_code"] == 600 + tsc,
                        ["fund_id", "typestandard_name"]]
        d = d.merge(tmp, how="left", on="fund_id")
    d.columns = list(d.columns[:-4]) + [
        "typestandard_name_{tsc}".format(tsc=_trans[tsc])
        for tsc in range(1, 5)
    ]

    for tsc in range(1, 5):
        d["type_code_{tsc}".format(tsc=_trans[tsc])] = d["fund_id"].apply(
            lambda fid: get_dict_value((fid, 600 + tsc), ftm, 0))
        d["type_code_name_{tsc}".format(
            tsc=_trans[tsc])] = d["type_code_{tsc}".format(
                tsc=_trans[tsc])].apply(lambda tc: get_dict_value(tc, tm))
        d["stype_code_{tsc}".format(tsc=_trans[tsc])] = d["fund_id"].apply(
            lambda fid: get_dict_value((fid, 600 + tsc), ftm, 1))
        d["stype_code_name_{tsc}".format(
            tsc=_trans[tsc])] = d["stype_code_{tsc}".format(
                tsc=_trans[tsc])].apply(lambda tc: get_dict_value(tc, tm))
    d = d.drop_duplicates(subset=["fund_id"])
    d.index = d["fund_id"]
    # 将fund_type_mapping表中typestandard_code = 601的所有
    # 和typestandard_code = 605中的type_code = 60502(股权投资基金),60503(创业投资基金),60504(其他投资基金),
    # 写入的fund_info_aggregation表的type_code_1,type_code_name_1中
    sql_ts5 = "SELECT fund_id, type_code as type_code_1, type_name as type_code_name_1 FROM fund_type_mapping \
               WHERE type_code IN (60502, 60503, 60504) AND flag = 1"

    df_ts5 = pd.read_sql(sql_ts5, engine_read)
    df_ts5.index = df_ts5["fund_id"]

    d = d.fillna(df_ts5)
    refresh(d, conn_write)
Esempio n. 18
0
        d["type_code_{tsc}".format(tsc=_trans[tsc])] = d["fund_id"].apply(
            lambda fid: get_dict_value((fid, 600 + tsc), ftm, 0))
        d["type_code_name_{tsc}".format(
            tsc=_trans[tsc])] = d["type_code_{tsc}".format(
                tsc=_trans[tsc])].apply(lambda tc: get_dict_value(tc, tm))
        d["stype_code_{tsc}".format(tsc=_trans[tsc])] = d["fund_id"].apply(
            lambda fid: get_dict_value((fid, 600 + tsc), ftm, 1))
        d["stype_code_name_{tsc}".format(
            tsc=_trans[tsc])] = d["stype_code_{tsc}".format(
                tsc=_trans[tsc])].apply(lambda tc: get_dict_value(tc, tm))
    d = d.drop_duplicates(subset=["fund_id"])
    d.index = d["fund_id"]
    # 将fund_type_mapping表中typestandard_code = 601的所有
    # 和typestandard_code = 605中的type_code = 60502(股权投资基金),60503(创业投资基金),60504(其他投资基金),
    # 写入的fund_info_aggregation表的type_code_1,type_code_name_1中
    sql_ts5 = "SELECT fund_id, type_code as type_code_1, type_name as type_code_name_1 FROM fund_type_mapping \
               WHERE type_code IN (60502, 60503, 60504) AND flag = 1"

    df_ts5 = pd.read_sql(sql_ts5, engine_read)
    df_ts5.index = df_ts5["fund_id"]

    d = d.fillna(df_ts5)
    refresh(d, conn_write)
    # return d


if __name__ == "__main__":
    su.tic("fund_info_aggregation...")
    main()
    su.tic("Done...")
Esempio n. 19
0
def calculate():
    su.tic("fetching get_data...")
    df_whole = pd.DataFrame()
    conn = engine_read.connect()

    year = process_date.year
    month = process_date.month

    month_range = cld.monthrange(year, month)[1]
    time_to_fill = sf.Time(dt.datetime(year, month, month_range))

    sql_bm = sf.SQL.market_index(time_to_fill.today)  # Get benchmark prices
    sql_pe = sf.SQL.pe_index(time_to_fill.today, freq="m")  ###w->m

    su.tic("preprocessing get_data...")
    bm = pd.read_sql(sql_bm, engine_read)
    bm["y1_treasury_rate"] = bm["y1_treasury_rate"].fillna(method="backfill")
    bm["y1_treasury_rate"] = bm["y1_treasury_rate"].apply(su.annually2monthly)
    bm["statistic_date"] = bm["statistic_date"].apply(su.date2tstp)
    pe = pd.read_sql(sql_pe, engine_read)
    pe["statistic_date"] = pe["statistic_date"].apply(su.date2tstp)
    conn.close()

    prices_bm = [
        bm["hs300"].tolist(), bm["csi500"].tolist(), bm["sse50"].tolist(),
        bm["cbi"].tolist(), bm["nfi"]
    ]
    price_pe = pe["index_value"].tolist()
    r_tbond = bm["y1_treasury_rate"].tolist()
    t_bm = bm["statistic_date"].tolist()
    t_pe = pe["statistic_date"].tolist()

    intervals = table.intervals
    intervals4 = [1, 2, 3, 4, 5, 6, 9, 10, 11]
    intervals5 = [1, 2, 3, 4, 5, 6, 10, 11]

    result = []
    conn = engine_read.connect()

    # Get Data
    date_s = sf.Time(process_date -
                     dt.timedelta(process_date.day))  # Generate statistic_date

    sql_fids_updated = sf.SQL.ids_updated_sd(date_s.today, "om")
    ids_updated = tuple(
        x[0]
        for x in conn.execute(sql_fids_updated).fetchall())  # 找到当月净值有更新的基金

    sql_o_updated = "SELECT DISTINCT fom.org_id FROM fund_org_mapping fom \
             JOIN org_info oi ON fom.org_id = oi.org_id \
             WHERE org_type_code = 1 AND oi.found_date <= '{0}'  AND fund_id IN {1}".format(
        date_s.today - relativedelta(months=3),
        ids_updated)  # 根据净值更新的基金确定需要计算的投顾
    o_updated = tuple(x[0] for x in conn.execute(sql_o_updated).fetchall())

    sql_fom = "SELECT fom.org_id, fom.fund_id, oi.found_date, oi.org_name FROM fund_org_mapping fom \
               JOIN org_info oi ON fom.org_id = oi.org_id \
               JOIN fund_info fi ON fom.fund_id = fi.fund_id \
               WHERE fom.org_id IN {0} AND fom.org_type_code = 1 AND oi.found_date <= '{1}' AND fi.foundation_date <= '{2}'".format(
        o_updated, date_s.today - relativedelta(months=3),
        date_s.today - relativedelta(months=1))
    fom = pd.read_sql(sql_fom, conn)  # 根据需要计算的投顾找到其旗下管理的所有基金

    fid_used = tuple(fom["fund_id"])
    sql_fnd = sf.SQL.nav(fid_used)
    fnd = pd.read_sql(sql_fnd, conn)
    fnd = fnd.dropna()
    fnd.index = range(len(fnd))

    data = fom.merge(fnd, how="inner", on="fund_id")
    data = data.sort_values(by=["org_id", "fund_id", "statistic_date"],
                            ascending=[True, True, False])
    t_mins = data.groupby(["org_id"])["statistic_date"].min().tolist()
    t_mins_tstp = [time.mktime(x.timetuple()) for x in t_mins]
    data["statistic_date"] = data["statistic_date"].apply(
        lambda x: time.mktime(x.timetuple()))
    data.index = range(len(data))

    ids_o = data["org_id"].drop_duplicates().tolist()
    names_o = data.drop_duplicates(subset=["org_id"])["org_name"].tolist()
    idx4slice_o = su.idx4slice(data, "org_id")
    dfs = [
        data[idx4slice_o[i]:idx4slice_o[i + 1]]
        if i != len(idx4slice_o) - 1 else data[idx4slice_o[i]:]
        for i in range(len(idx4slice_o) - 1)
    ]

    # Proprocess
    # 标准序列
    t_stds = [
        tu.timeseries_std(date_s.today,
                          interval,
                          periods_y=12,
                          use_lastday=True,
                          extend=1) for interval in intervals
    ]
    t_std_y5 = t_stds[6]
    t_stds_len = [len(x) - 1 for x in t_stds]

    # 基金标准序列_成立以来
    t_std_alls = [
        tu.timeseries_std(date_s.today,
                          tu.periods_in_interval(date_s.today, t_min, 12),
                          periods_y=12,
                          use_lastday=True,
                          extend=6) for t_min in t_mins
    ]  # 标准序列_成立以来
    t_std_alls = [
        t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1]
        for t_std_all, t_min in zip(t_std_alls, t_mins_tstp)
    ]

    # 基准指数的标准序列_成立以来
    matchs_bm = [
        tu.outer_match4indicator_m(t_bm, t_std_all, False)
        for t_std_all in t_std_alls
    ]
    idx_matchs_bm = [x[1] for x in matchs_bm]
    price_bm0_all = [[
        prices_bm[0][ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_bm]
    price_bm1_all = [[
        prices_bm[1][ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_bm]
    price_bm2_all = [[
        prices_bm[2][ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_bm]
    price_bm3_all = [[
        prices_bm[3][ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_bm]
    price_bm4_all = [[
        prices_bm[4][ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_bm]

    matchs_pe = [
        tu.outer_match4indicator_m(t_pe, t_std_all, False)
        for t_std_all in t_std_alls
    ]
    idx_matchs_pe = [x[1] for x in matchs_pe]
    price_pe_all = [[
        price_pe[ix] if ix is not None else None for ix in idx.values()
    ] for idx in idx_matchs_pe]

    # 基准指标的收益率_成立以来
    r_bm0_all = [fi.gen_return_series(x) for x in price_bm0_all]
    r_bm1_all = [fi.gen_return_series(x) for x in price_bm1_all]
    r_bm2_all = [fi.gen_return_series(x) for x in price_bm2_all]
    r_bm3_all = [fi.gen_return_series(x) for x in price_bm3_all]
    r_bm4_all = [fi.gen_return_series(x) for x in price_bm4_all]

    r_pe_all = [fi.gen_return_series(x) for x in price_pe_all]

    tmp = [len(idx_matchs_bm[i]) for i in range(len(idx_matchs_bm))]
    tmp_id = tmp.index(max(tmp))
    tmp_list = [
        r_tbond[ix] if ix is not None else None
        for ix in idx_matchs_bm[tmp_id].values()
    ]
    tmp = pd.DataFrame(tmp_list)[0].fillna(method="backfill").tolist()

    r_f_all = [[
        r_tbond[idx[k]] if idx[k] is not None else tmp[k] for k in idx.keys()
    ] for idx in idx_matchs_bm]
    r_f_all = [x[1:] for x in r_f_all]

    # 基准指标的收益率_不同频率
    matchs_bm = tu.outer_match4indicator_m(t_bm, t_std_y5,
                                           False)  # 基准指数标准序列_成立以来
    matchs_pe = tu.outer_match4indicator_m(t_pe, t_std_y5, False)
    idx_matchs_bm = matchs_bm[1]
    idx_matchs_pe = matchs_pe[1]
    price_bm0_y5 = [
        prices_bm[0][ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    ]
    price_bm1_y5 = [
        prices_bm[1][ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    ]
    price_bm2_y5 = [
        prices_bm[2][ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    ]
    price_bm3_y5 = [
        prices_bm[3][ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    ]
    price_bm4_y5 = [
        prices_bm[4][ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    ]

    price_pe_y5 = [
        price_pe[ix] if ix is not None else None
        for ix in idx_matchs_pe.values()
    ]

    # 基准指标的收益率_不同频率
    r_bm0_y5 = fi.gen_return_series(price_bm0_y5)
    r_bm1_y5 = fi.gen_return_series(price_bm1_y5)
    r_bm2_y5 = fi.gen_return_series(price_bm2_y5)
    r_bm3_y5 = fi.gen_return_series(price_bm3_y5)
    r_bm4_y5 = fi.gen_return_series(price_bm4_y5)
    r_pe_y5 = fi.gen_return_series(price_pe_y5)

    r_f_y5 = [
        r_tbond[ix] if ix is not None else None
        for ix in idx_matchs_bm.values()
    ]
    r_f_y5 = r_f_y5[1:]

    rs_bm0 = [r_bm0_y5[:length - 1] for length in t_stds_len]
    rs_bm1 = [r_bm1_y5[:length - 1] for length in t_stds_len]
    rs_bm2 = [r_bm2_y5[:length - 1] for length in t_stds_len]
    rs_bm3 = [r_bm3_y5[:length - 1] for length in t_stds_len]
    rs_bm4 = [r_bm4_y5[:length - 1] for length in t_stds_len]

    rs_pe = [r_pe_y5[:length - 1] for length in t_stds_len]
    rs_f = [r_f_y5[:length - 1] for length in t_stds_len]

    benchmark = {
        1: rs_bm0,
        2: rs_bm1,
        3: rs_bm2,
        4: rs_pe,
        6: rs_bm3,
        7: rs_bm4
    }
    benchmark_all = {
        1: r_bm0_all,
        2: r_bm1_all,
        3: r_bm2_all,
        4: r_pe_all,
        6: r_bm3_all,
        7: r_bm4_all
    }

    su.tic("calculating ...")
    for i in range(len(ids_o)):
        if i % 200 == 0:
            print("%s done..." % str(i * 200))
        df = dfs[i]
        df.index = range(len(df))
        idx4slice = su.idx4slice(df, "fund_id")
        navs = su.slice(df, idx4slice, "nav")
        t_reals = su.slice(df, idx4slice, "statistic_date")

        matchs_all = [
            tu.outer_match4indicator_m(t_real, t_std_alls[i], drop_none=False)
            for t_real in t_reals
        ]
        idx_matchs_all = [x[1] for x in matchs_all]
        nav_matchs_all = [[
            nav[ix] if ix is not None else np.NaN for ix in idx.values()
        ] for nav, idx in zip(navs, idx_matchs_all)]

        nv_matrix = np.array(nav_matchs_all).T
        r_total = np.nanmean((nv_matrix[:-1] / nv_matrix[1:] - 1), axis=1)
        price_total = np.nancumprod(1 + r_total[::-1])[::-1].tolist()
        price_total.append(1)  # 定义基期伪价格为1
        r_total = fi.gen_return_series(price_total)

        prices = []
        for j in range(7):
            if t_mins[i] + relativedelta(months=intervals[j]) <= date_s.today:
                length = min(len(price_total), t_stds_len[j])
                prices.append(price_total[:length])
            else:
                prices.append(None)

        for j in range(7, 11):
            length = min(len(price_total), t_stds_len[j])
            prices.append(price_total[:length])

        prices.append(price_total)
        navs1 = [prices[i] for i in intervals4]
        navs2 = [prices[i] for i in intervals5]
        rs1 = [fi.gen_return_series(x) for x in navs1]
        rs2 = [fi.gen_return_series(x) for x in navs2]

        rs_f_ = rs_f.copy()
        rs_f_.append(r_f_all[i])
        rs_f1_ = [rs_f_[i] for i in intervals4]
        rs_f2_ = [rs_f_[i] for i in intervals5]

        ir = [fi.accumulative_return(r) for r in navs1[:-1]]
        ir.append(
            fi.accumulative_return([x for x in navs1[-1] if x is not None]))  #
        ir_a = [fi.return_a(r, 12) for r in rs1]
        calmar = [fi.calmar_a(nv, r_f, 12) for nv, r_f in zip(navs2, rs_f2_)]
        sortino = [fi.sortino_a(r, r_f, 12) for r, r_f in zip(rs2, rs_f2_)]

        for k in benchmark.keys():
            rs_bm_ = benchmark[k].copy()  # 指定benchmark
            rs_bm_.append(benchmark_all[k][i])
            rs_bm1 = [rs_bm_[i] for i in intervals4]
            rs_bm2 = [rs_bm_[i] for i in intervals5]

            ier_a = [
                fi.excess_return_a(r, r_bm, 12)
                for r, r_bm in zip(rs1, rs_bm1)
            ]
            info = [fi.info_a(r, r_bm, 12) for r, r_bm in zip(rs2, rs_bm2)]
            jensen = [
                fi.jensen_a(r, r_bm, r_f, 12)
                for r, r_bm, r_f in zip(rs2, rs_bm2, rs_f2_)
            ]
            treynor = [
                fi.treynor_a(r, r_bm, r_f, 12)
                for r, r_bm, r_f in zip(rs2, rs_bm2, rs_f2_)
            ]
            sharpe_a = [fi.sharpe_a(r, r_f, 12) for r, r_f in zip(rs2, rs_f2_)]

            tmp = [
                ir, ir_a, ier_a, calmar, sortino, treynor, info, jensen,
                sharpe_a
            ]
            result_i = [
                ids_o[i], names_o[i], k, 1, 1, nv_matrix.shape[1], 60001,
                "全产品", 6000101, "全产品", date_s.today
            ]
            for x in tmp:
                result_i.extend(x)
            result.append(result_i)

    df = pd.DataFrame(result)
    df[list(range(11, 78 + 8))] = df[list(range(11,
                                                78 + 8))].astype(np.float64)
    df[list(range(11, 78 + 8))] = df[list(range(
        11, 78 + 8))].apply(lambda x: round(x, 6))
    df.columns = columns
    df_whole = df_whole.append(df)

    return df_whole
Esempio n. 20
0
def main():
    tic("Calculating time_index...")
    fd_f, fd_o = [calculate(x) for x in [1, 2]]
    for d_fd in [fd_f, fd_o]:
        io.to_sql("time_index", engine_wt, d_fd, "update")
    tic("Done...")
Esempio n. 21
0
File: swanav.py Progetto: dxcv/fund
from utils.database import config as cfg, io
from utils.script import swanv
from utils.script.scriptutils import tic

engines = cfg.load_engine()
engine_rd = engines["2Gb"]
engine_wt = engines["2Gb"]


def main():
    conn = engine_wt.connect()
    sql_check = "UPDATE fund_nv_data_standard SET swanav = NULL WHERE nav <> added_nav AND swanav = nav"
    conn.execute(sql_check)

    sql_update = "UPDATE fund_nv_data_standard SET swanav = nav WHERE nav = added_nav"
    conn.execute(sql_update)
    conn.close()

    df = swanv.calculate_swanav()
    if len(df) > 0:
        df.index = range(len(df))
        print("{num} records to update".format(num=len(df)))
        io.to_sql("fund_nv_data_standard", engine_wt, df, "update")


if __name__ == "__main__":
    tic("swanav...")
    main()
    tic("Done...")
Esempio n. 22
0
def calculate(idx, export_path=None):
    dfs = pd.DataFrame()

    PEIndex = sf.PEIndex(idx)
    first_year = PEIndex.firstyear

    result_r = {}
    components_num = {}
    components = {}

    for year in range(first_year, process_date.year + 1):
        if year == process_date.timetuple().tm_year:
            month = process_date.month
        else:
            month = 12

        sql_i = sf.SQL_PEIndex(PEIndex.idx, year).yeardata_m

        conn = engine_rd.connect()

        su.tic("Getting Data")
        d = pd.read_sql(sql_i, conn)
        conn.close()

        su.tic("Preprocessing...")
        d["statistic_date"] = d["statistic_date"].apply(
            lambda x: time.mktime(x.timetuple()))
        d_dd = d.drop_duplicates("fund_id")
        idx_slice = d_dd.index.tolist()
        idx_slice.append(len(d))
        ids = d_dd["fund_id"].tolist()

        t_std = tu.timeseries_std(dt.datetime(year, month, 10),
                                  month,
                                  12,
                                  1,
                                  use_lastday=True)
        t_std1 = t_std[:-1]

        su.tic("Grouping...")
        ds = [
            d[idx_slice[i]:idx_slice[i + 1]]
            for i in range(len(idx_slice) - 1)
        ]
        ts = [x["statistic_date"].tolist() for x in ds]
        navs = [x["nav"].tolist() for x in ds]

        su.tic("Matching...")
        matchs1 = [tu.outer_match4index_f7(x, t_std1, False) for x in ts]
        matchs2 = [tu.outer_match4index_b7(x, t_std1) for x in ts]
        matchs3 = [tu.outer_match4index_m(x, t_std, False) for x in ts]
        matchs = [
            su.merge_result(x1, x2, x3)
            for x1, x2, x3 in zip(matchs1, matchs2, matchs3)
        ]

        su.tic("Getting Result...")
        t_matchs = [x[0] for x in matchs]
        t_matchs = [tu.tr(x) for x in t_matchs]
        idx_matchs = [x[1] for x in matchs]
        nav_matchs = [[
            navs[i][idx] if idx is not None else None
            for idx in idx_matchs[i].values()
        ] for i in range(len(idx_matchs))]

        su.tic("Calculating Index...")
        nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix()
        rs = nvs[:-1] / nvs[1:] - 1
        rs[rs > 30] = np.nan
        rs[rs < -1] = np.nan
        r = np.nanmean(rs, axis=1)
        r[np.isnan(r)] = 0

        result_r[year] = r
        components_num[year] = np.sum(~np.isnan(rs), axis=1)

        # log samples
        tmp = pd.DataFrame(nvs, columns=ids).T
        tmp["fund_id"] = tmp.index
        tmp = tmp[[tmp.columns[-1], *tmp.columns[:-1]]]
        components[year] = tmp

        su.tic("Year:{0}, Done...".format(year))

    values_r = []
    values_num = []
    for year in range(first_year, process_date.timetuple().tm_year + 1):
        if len(values_r) == 0:
            values_r = result_r[year].tolist()[::-1]
            values_num = components_num[year].tolist()[::-1]
        else:
            values_r.extend(result_r[year].tolist()[::-1])
            values_num.extend(components_num[year].tolist()[::-1])

    result = (np.array(values_r) + 1).cumprod() * 1000
    result = result.tolist()
    result.insert(0, 1000)
    values_num.insert(0, 0)

    # tag = tu.timeseries_std(dt.datetime(year, month + 1, 10),
    #                         tu.periods_in_interval(dt.datetime(year, month + 1, 10), dt.datetime(first_year, 1, 10),
    #                                                12), 12)[::-1]
    tag = tu.timeseries_std(
        dt.datetime(year, month + 1, 10),
        tu.periods_in_interval(dt.datetime(year, month + 1, 10),
                               dt.datetime(first_year, 1, 10), 12), 12)[::-1]
    tag = [dt.date.fromtimestamp(x - 864000) for x in tag]

    op = pd.DataFrame(list(zip(tag, result, values_num)))
    op.columns = ["statistic_date", "index_value", "funds_num"]

    cols = [
        "index_id", "index_name", "typestandard_code", "typestandard_name",
        "type_code", "type_name", "stype_code", "stype_name", "index_method",
        "data_source", "data_source_name"
    ]
    values = [
        PEIndex.id, PEIndex.name, PEIndex.typestandard["code"],
        PEIndex.typestandard["name"], PEIndex.type["code"],
        PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1,
        0, "私募云通"
    ]
    col_dict = dict(zip(cols, values))
    for col, val in col_dict.items():
        op[col] = val

    dfs = dfs.append(op[:-1])

    if export_path is not None:
        tmp = tag.copy()
        for year in sorted(components.keys(), reverse=True):
            print(year, len(tmp))
            components[year].columns = [
                "fund_id",
                *[tmp.pop() for i in range(len(components[year].columns) - 2)],
                tmp[-1]
            ]
        io.export_to_xl(
            components,
            "{sd}_{index_name}_m_samples".format(sd=tag[-2].strftime("%Y%m%d"),
                                                 index_name=PEIndex.id),
            export_path)

    return dfs
Esempio n. 23
0
            if fue >= 1:
                df_p2.ix[idx].ix[:fue, "added_nav"] = df_p2.ix[idx].ix[:fue,
                                                                       "nav"]
            else:
                continue
        else:
            print(idx)
            continue

    result = df_p1.append(df_p2).sort_values(by=["fund_id", "statistic_date"],
                                             ascending=[True, True])
    ids_error = set(result.loc[
        (result.nav == result.added_nav) & (result.nav != result.swanav1) &
        (~np.isnan(result.swanav1))].fund_id.drop_duplicates().tolist())

    result = result.loc[result.fund_id.apply(lambda x: x not in ids_error)]
    result["data_source"] = 2
    result.index = range(len(result))
    io.to_sql("fund_nv_data_source",
              engine_wt,
              result[["fund_id", "statistic_date", "data_source",
                      "added_nav"]],
              type="update")
    return result


if __name__ == "__main__":
    tic("nv_data_source...")
    main()
    tic("Done...")
Esempio n. 24
0
        result = cal_std(sql_fund_ids, True)

        su.tic("Process:{}/{}".format(index, len(sliced)))
        index += 1

        su.tic("Data to DB...month..{}.".format(sql_fund_ids))
        conn.execute(
            "DELETE FROM fund_nv_standard_m where fund_id in ({})".format(
                sql_fund_ids))
        result["m"].to_sql("fund_nv_standard_m",
                           conn,
                           if_exists="append",
                           index=False,
                           chunksize=10000)

        su.tic("Data to DB...week..{}.".format(','.join(fund_ids)))
        conn.execute(
            "DELETE FROM fund_nv_standard_w where fund_id in ({})".format(
                sql_fund_ids))
        result["w"].to_sql("fund_nv_standard_w",
                           conn,
                           if_exists="append",
                           index=False,
                           chunksize=10000)


if __name__ == "__main__":
    su.tic("nv2std...")
    main()
    su.tic("Done...")