def update_industry_to_json(industry, trading_days): try: date = trading_days[-1] index_code, loader = INDEX_LOADER_MAP[industry] info = loader(index_code, date, level=1) except Exception: Logger.error("Error occurred when loading {} on {}".format(industry, date)) raise ValueError try: path = os.path.join(DB_INDUSTRY, '{}.json'.format(industry)) copy_to = os.path.join(DB_INDUSTRY, '{}_backup.json'.format(industry)) shutil.copy(path, copy_to) # 保存副本,以防数据损坏 dict2json(info, path, log=False) Logger.info("{} on {} is updated successfully".format(industry, date)) except Exception: Logger.error("Error occurred when writing {} on {}".format(industry, date)) raise ValueError # json files are different from sql, cannot use update_schema() # therefore update schema information explicitly try: now = datetime.now() schema = get_schema('industry') schema[industry]["begin date"] = "" schema[industry]["end date"] = now.strftime('%Y-%m-%d') schema[industry]['last update'] = now.strftime('%Y-%m-%d %H:%M:%S') save_schema(schema, 'industry') Logger.info("schema updated: {}".format(industry)) except Exception: Logger.error("Error occurred when updating schema of {}".format(industry)) traceback.print_exc() raise ValueError
def update_schema(db_name, sub_name): """ 更新schema相关的begin date,end date, last update 适用于非factor_return相关的数据库 @db_name (str): db的名称 eg. FACTOR 排除factor_return @sub_name (str): db中各子数据库的名称 eg. VALUE GROWTH """ schema = json2dict(os.path.join(DB_PATH_LIB[db_name], 'schema')) assert sub_name date_list = get_date_lists_in_table(DB_PATH_LIB[db_name], sub_name) schema[sub_name]['begin date'] = date_list[0] schema[sub_name]['end date'] = date_list[-1] schema[sub_name]['last update'] = datetime.now().strftime( '%Y-%m-%d %H:%M:%S') Logger.info("schema updated: {}".format(sub_name)) dict2json(schema, os.path.join(DB_PATH_LIB[db_name], 'schema'), log=False) a = pd.DataFrame(schema).T col_names = [ 'aspect', 'type', 'begin date', 'end date', 'last update', 'col_names', 'field', 'kwargs', 'explanation' ] b = a.reindex(columns=col_names).reset_index().rename(columns={ 'index': 'indicator' }).sort_values(['type', 'aspect', 'field']) b.to_csv(os.path.join(DB_PATH_LIB[db_name], 'schema.csv'), index=False)
def backtest_for_mfs(yearmonth, selected_sec_ids, benchmark, model_name, label_kind="label_A", override=False): """ 多因子选股回测框架 @yearmonth <"%Y-%m">: 年份和月份 @selected_sec_ids <list>: yearmonth: selected sec_ids @benchmark <str>: 基准收益 目前支持 hs300 zz500 wind_ALL_A @plot <bool>: 是否绘制累计收益图 @model_name <str>: 模型名称 @label_kind <str>: 标记获得方法 """ back_test_path = os.path.join( DB_PATH, r"backtest\{}\{}\{}".format(model_name, label_kind, benchmark)) file_path = os.path.join(back_test_path, "records_{}.json".format(yearmonth)) # yearmonth = if not os.path.exists(back_test_path): os.makedirs(back_test_path) if os.path.exists(file_path) and (not override): return records = {} # 获取所有各月度的收益率和月初股价 df_rr_monthly = pd.read_csv( os.path.join(DB_PATH, r"datasets\pre\rr_monthly.csv")) df_rr_monthly_1 = df_rr_monthly.set_index(['sec_id', 'yearmonth']) df_rr_monthly_1 = df_rr_monthly_1.sort_index(level='sec_id') perform = df_rr_monthly_1.loc[(selected_sec_ids, yearmonth), ['CLOSE', 'month_ret']] model_ret = (perform['CLOSE'] * perform['month_ret']).sum() / perform['CLOSE'].sum() benchmark_rr = df_rr_monthly[df_rr_monthly['sec_id'] == benchmark] benchmark_rr = benchmark_rr.set_index(['yearmonth']) benchmark_ret = benchmark_rr.loc[yearmonth, 'month_ret'] records[yearmonth] = { "model": model_ret, "benchmark": benchmark_ret, "selected_sec_ids": ",".join(selected_sec_ids) } dk.dict2json(records, file_path)
def update_factor_return_schema(factor): """ 更新factor_return的schema相关的begin date,end date, last update @factor (str): factor的名称 """ schema = json2dict(os.path.join(DB_PATH_LIB['factor_return'], 'schema')) filepath = os.path.join(DB_PATH_LIB['factor_return'], "{}.csv".format(factor)) df = pd.read_csv(filepath, encoding="utf-8")["date"] schema[factor]['begin date'] = df.min() schema[factor]['end date'] = df.max() schema[factor]['last update'] = \ datetime.now().strftime('%Y-%m-%d %H:%M:%S') Logger.info("schema updated: {}".format(factor)) dict2json(schema, os.path.join(DB_PATH_LIB['factor_return'], 'schema'), log=False)
def save_schema(schema, db_name): dict2json(schema, SCHEMA_PATHS[db_name], log=False)