Esempio n. 1
0
def calculate(time_s, time_e):
    try:
        tasks = pre.generate_tasks(time_s, time_e, freq="w", processes=7, conn=engine_rd)
        tasks = {k: v for k, v in tasks.items() if k >= dt.date(2015, 1, 1)}
        print(time_e, len(tasks))

    except ValueError as e:
        print(time_e, e)
    for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]):
        print("UPDATE TIME:{ut}: STATISTIC DATE:{sd}, LENGTH:{l}".format(ut=time_e, sd=statistic_date, l=len(ids_used)))
        result_return = []
        result_risk = []
        result_sub = []
        data = pre.ProcessedData(statistic_date, list(ids_used), _freq)
        bms = {index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items()}
        tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate")
        for fid, attrs in data.funds.items():
            fund = cal.Fund(attrs)
            res_return, cols_return_sorted = cal.calculate(_funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True)
            res_risk, cols_risk_sorted = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True)
            res_sub, cols_sub_sorted = cal.calculate(_funcs_sub, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True)
            result_return.extend(res_return)
            result_risk.extend(res_risk)
            result_sub.extend(res_sub)

        df_return = pd.DataFrame(result_return)
        df_risk = pd.DataFrame(result_risk)
        df_sub = pd.DataFrame(result_sub)

        cols_return = cal.format_cols(
            cols_return_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]
        )
        cols_risk = cal.format_cols(
            cols_risk_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]
        )
        cols_sub = cal.format_cols(
            cols_sub_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]
        )

        df_return.columns = cols_return
        df_risk.columns = cols_risk
        df_sub.columns = cols_sub

        try:
            io.to_sql("fund_weekly_return", conn=engine_rd, dataframe=df_return, chunksize=5000)
            io.to_sql("fund_weekly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000)
            io.to_sql("fund_subsidiary_weekly_index", conn=engine_rd, dataframe=df_sub, chunksize=5000)
        except Exception as e:
            time.sleep(10)
            io.to_sql("fund_weekly_return", conn=engine_rd, dataframe=df_return, chunksize=5000)
            io.to_sql("fund_weekly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000)
            io.to_sql("fund_subsidiary_weekly_index", conn=engine_rd, dataframe=df_sub, chunksize=5000)

    print("TASK DONE: {ut}".format(ut=time_e))
Esempio n. 2
0
def generate_tasks(update_time_l, update_time_r):
    try:
        tasks = pre.generate_tasks(update_time_l,
                                   update_time_r,
                                   freq="w",
                                   processes=7,
                                   conn=engine_rd)
        tasks = {k: v for k, v in tasks.items() if k >= dt.date(2015, 1, 1)}
        print("{l}->{r}".format(l=update_time_l, r=update_time_r), len(tasks))

    except ValueError as e:
        print("{l}->{r}".format(l=update_time_l, r=update_time_r), e)
    return tasks
Esempio n. 3
0
    "ability_security",
    "tracking_error_a",
    "p_earning_periods",
    "n_earning_periods",
    "min_return",
    "max_return",
    "skewness",
    "kurtosis",
]

_bms_used = ["hs300", "csi500", "sse50", "cbi", "nfi"]
for update_time in [UPDATE_TIME]:
    try:
        tasks = pre.generate_tasks(update_time -
                                   relativedelta(hours=35, minutes=5),
                                   update_time,
                                   freq=_freq,
                                   processes=7,
                                   conn=engine_rd)
        tasks = {k: v for k, v in tasks.items() if k >= dt.date(2015, 1, 1)}
        print(update_time, len(tasks))

    except ValueError as e:
        print(update_time, e)
        continue
    for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]):
        if statistic_date != dt.date(2017, 9, 21): continue
        print("UPDATE TIME:{ut}: STATISTIC DATE:{sd}, LENGTH:{l}".format(
            ut=update_time, sd=statistic_date, l=len(ids_used)))
        result_return = []
        result_risk = []
        result_sub = []
Esempio n. 4
0
    prefix=["fund_id", "fund_name", "statistic_date", "benchmark"])
cols_risk = cal.format_cols(
    _funcs_risk,
    _freq,
    prefix=["fund_id", "fund_name", "statistic_date", "benchmark"])
cols_sub = cal.format_cols(
    _funcs_sub,
    _freq,
    prefix=["fund_id", "fund_name", "statistic_date", "benchmark"])

_bms_used = ["hs300", "csi500", "sse50", "cbi", "strategy", "FI01", "nfi"]

try:
    tasks = pre.generate_tasks(last_month - dt.timedelta(last_month.day - 1),
                               last_month + dt.timedelta(0, 86399),
                               freq=_freq,
                               processes=7,
                               conn=engine_rd)
    tasks = {
        k: v
        for k, v in tasks.items()
        if (k >= dt.date(2015, 1, 1)
            and k < dt.date(UPDATE_TIME.year, UPDATE_TIME.month, 1))
    }
    print(UPDATE_TIME, len(tasks))

except ValueError as e:
    print(UPDATE_TIME, e)
for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]):
    print("UPDATE TIME:{ut}: STATISTIC DATE:{sd}, LENGTH:{l}".format(
        ut=UPDATE_TIME, sd=statistic_date, l=len(ids_used)))