def calculate(time_s, time_e): try: tasks = pre.generate_tasks(time_s, time_e, freq="w", processes=7, conn=engine_rd) tasks = {k: v for k, v in tasks.items() if k >= dt.date(2015, 1, 1)} print(time_e, len(tasks)) except ValueError as e: print(time_e, e) for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]): print("UPDATE TIME:{ut}: STATISTIC DATE:{sd}, LENGTH:{l}".format(ut=time_e, sd=statistic_date, l=len(ids_used))) result_return = [] result_risk = [] result_sub = [] data = pre.ProcessedData(statistic_date, list(ids_used), _freq) bms = {index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items()} tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) res_return, cols_return_sorted = cal.calculate(_funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_risk, cols_risk_sorted = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_sub, cols_sub_sorted = cal.calculate(_funcs_sub, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) result_return.extend(res_return) result_risk.extend(res_risk) result_sub.extend(res_sub) df_return = pd.DataFrame(result_return) df_risk = pd.DataFrame(result_risk) df_sub = pd.DataFrame(result_sub) cols_return = cal.format_cols( cols_return_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"] ) cols_risk = cal.format_cols( cols_risk_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"] ) cols_sub = cal.format_cols( cols_sub_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"] ) df_return.columns = cols_return df_risk.columns = cols_risk df_sub.columns = cols_sub try: io.to_sql("fund_weekly_return", conn=engine_rd, dataframe=df_return, chunksize=5000) io.to_sql("fund_weekly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000) io.to_sql("fund_subsidiary_weekly_index", conn=engine_rd, dataframe=df_sub, chunksize=5000) except Exception as e: time.sleep(10) io.to_sql("fund_weekly_return", conn=engine_rd, dataframe=df_return, chunksize=5000) io.to_sql("fund_weekly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000) io.to_sql("fund_subsidiary_weekly_index", conn=engine_rd, dataframe=df_sub, chunksize=5000) print("TASK DONE: {ut}".format(ut=time_e))
def cal_by_date(statistic_date, fund_ids): """ :param statistic_date: datetime.date :param fund_ids: list :return: """ print("STATISTIC_DATE:{sd}, LENGTH:{l}".format(sd=statistic_date, l=len(fund_ids))) result_1 = [] result_2 = [] result_3 = [] data = pre.ProcessedData(statistic_date, fund_ids, _freq, pe=[], conn=engine_rd, conn_mkt=engine_mkt, weekday=True) bms = { index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items() } tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) res_1, _funcs_1_sourted = cal.calculate(_funcs_1, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_2, _funcs_2_sorted = cal.calculate(_funcs_2, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_3, _funcs_3_sorted = cal.calculate(_funcs_3, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) result_1.extend(res_1) result_2.extend(res_2) result_3.extend(res_3) df_1 = pd.DataFrame(result_1) df_2 = pd.DataFrame(result_2) df_3 = pd.DataFrame(result_3) # 删除空行 df_1.dropna(how='all', inplace=True) df_2.dropna(how='all', inplace=True) df_3.dropna(how='all', inplace=True) cols_1 = cal.format_cols_mutual( _funcs_1_sourted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]) cols_2 = cal.format_cols_mutual( _funcs_2_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]) cols_3 = cal.format_cols_mutual( _funcs_3_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]) if len(df_1): df_1.columns = cols_1 io.to_sql("fund_daily_return", conn=engine_wt, dataframe=df_1, chunksize=500) if len(df_2): df_2.columns = cols_2 io.to_sql("fund_daily_risk", conn=engine_wt, dataframe=df_2, chunksize=500) if len(df_3): df_3.columns = cols_3 io.to_sql("fund_daily_subsidiary", conn=engine_wt, dataframe=df_3, chunksize=500)
result_risk = [] result_sub = [] data = pre.ProcessedData(statistic_date, list(ids_used), _freq, pe=[], conn=engine_rd, conn_mkt=engine_mkt) bms = { index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items() } tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): if fid != "000127": continue fund = cal.Fund(attrs) res_return, _funcs_return_sorted = cal.calculate( _funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_risk, _funcs_risk_sorted = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date,
def calculate(statistic_date_with_ids_used): """ Args: tasks<date: {oid1, oid2, ..., }>: Returns: """ statistic_date, ids_used = statistic_date_with_ids_used data = pre.ProcessedData(statistic_date, sorted(ids_used), _freq) bms = { index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items() } tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) result_return = [] result_risk = [] result_sub = [] res_return, cols_return_sorted = cal.calculate(_funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_risk, cols_risk_sorted = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_sub, cols_sub_sorted = cal.calculate(_funcs_sub, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) result_return.extend(res_return) result_risk.extend(res_risk) result_sub.extend(res_sub) df_return = pd.DataFrame(result_return) df_risk = pd.DataFrame(result_risk) df_sub = pd.DataFrame(result_sub) cols_return = cal.format_cols_org4r( cols_return_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) cols_risk = cal.format_cols_org4r( cols_risk_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) cols_sub = cal.format_cols_org4r( cols_sub_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) df_return.columns = cols_return df_risk.columns = cols_risk df_sub.columns = cols_sub df_return["index_id"] = sf.SQL.Org4R.INDEXID df_risk["index_id"] = sf.SQL.Org4R.INDEXID df_sub["index_id"] = sf.SQL.Org4R.INDEXID io.to_sql("org_monthly_return", conn=engine_rd, dataframe=df_return, chunksize=5000) io.to_sql("org_monthly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000) io.to_sql("org_monthly_research", conn=engine_rd, dataframe=df_sub, chunksize=5000)
def calculate(statistic_date): print(statistic_date) ids_used = pre.fetch_fids_used(statistic_date=statistic_date, freq=_freq, conn=engine_rd) # TMP EXEC # tmp = set(['P1001447', 'P1008404', 'P1003197', 'P1000902', 'P1004813', 'P1004746', 'P1001198', # 'P1001203', 'P1004150', 'P1014451', 'P1008451', 'P1008160', 'P1000277', 'P1028421']) # ids_used = list(set(ids_used).intersection(tmp)) data = pre.ProcessedData(statistic_date, ids_used, _freq) bms = { index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items() } tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) res_return, cols_return_sorted = cal.calculate(_funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_risk, cols_risk_sorted = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_sub, cols_sub_sorted = cal.calculate(_funcs_sub, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) result_return = [] result_risk = [] result_sub = [] result_return.extend(res_return) result_risk.extend(res_risk) result_sub.extend(res_sub) df_return = pd.DataFrame(result_return) df_risk = pd.DataFrame(result_risk) df_sub = pd.DataFrame(result_sub) cols_return = cal.format_cols_org4r( cols_return_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) cols_risk = cal.format_cols_org4r( cols_risk_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) cols_sub = cal.format_cols_org4r( cols_sub_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) df_return.columns = cols_return df_risk.columns = cols_risk df_sub.columns = cols_sub df_return["index_id"] = sf.SQL.Org4R.INDEXID df_risk["index_id"] = sf.SQL.Org4R.INDEXID df_sub["index_id"] = sf.SQL.Org4R.INDEXID io.to_sql("org_monthly_return", conn=engine_rd, dataframe=df_return, chunksize=5000) io.to_sql("org_monthly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000) io.to_sql("org_monthly_research", conn=engine_rd, dataframe=df_sub, chunksize=5000)