Beispiel #1
0
def cs_bert_workflow():
    equity_split_step = ChnEquityInputStep(train_val_split_ratio=0.9)
    train_ds_with_pip = FinancialStatementCSMaskedTFDatasetStep(ds_pip="lambda ds: ds.repeat().batch(50)",
                                                                _input_steps=[(equity_split_step, "train")])
    val_ds_with_pip = FinancialStatementCSMaskedTFDatasetStep(ds_pip="lambda ds: ds.repeat().batch(50)",
                                                              _input_steps=[(equity_split_step, "validation")])
    eval_ds_with_pip = FinancialStatementCSMaskedTFDatasetStep(ds_pip="lambda ds: ds.batch(10)",
                                                               _input_steps=[(equity_split_step, "evaluate")])
    train_input_steps = [(train_ds_with_pip, "train_ds"), (val_ds_with_pip, "val_ds"), (eval_ds_with_pip, "test_ds")]

    model_step = TFModelStep(model_cls_str=cls_to_str(TSBertForMaskedCS),
                             model_hp=TSBertForMaskedCS.HP(
                                 name=TSBertName.CHN_EQUITY_FINANCIAL_STATEMENT_CONST_MASK,
                                 hidden_size=FinancialStatementCSBertConst.FIN_STATEMENT_INDICATORS_COUNT,
                                 num_attention_heads=12))
    train_input_steps += [model_step]
    train_input_steps += [get_compile_step(loss="ts_bert_mae", metrics=["ts_bert_mae", "ts_bert_mse"])]
    train_input_steps += get_recommend_fit_with_callback_steps(epochs=5, steps_per_epoch=700, validation_steps=70)

    train_step = TFTrainStep(_input_steps=train_input_steps)
    # 作为一个标准的 workflow 进行保存
    from gs_research_workflow.samples import workflow_cfg

    sample_file_path = os.path.join(os.path.dirname(workflow_cfg.__file__),
                                    f"{TSBertForMaskedCS.__name__}_workflow_v1.yml")
    print(sample_file_path)
    save_mapping_to_file_or_stream(sample_file_path, train_step.get_init_value_dict(True), None)
    return train_step
def get_default_inception_model_classification_task_step(nb_classes: int) -> TFModelStep:
    model_step = TFModelStep(model_cls_str=cls_to_str(InceptionTimeForClassification),
                             model_hp=InceptionTimeForClassification.HP(nb_classes=nb_classes,
                                                                        inception_time_hp=InceptionTimeBlock.HP(
                                                                            depth=6, use_residual=True,
                                                                            inception_block_hp=InceptionBlock.HP(
                                                                                stride=1,
                                                                                use_bottleneck=True))))
    return model_step
Beispiel #3
0
    def cs_financial_statement_model_evaluate():
        from gs_research_workflow.time_series.models.ts_bert import TSBertForMaskedCS
        from gs_research_workflow.time_series.gs_steps.model_steps import TFModelStep

        # 显示所有列
        pd.set_option('display.max_columns', None)
        # 显示所有行
        pd.set_option('display.max_rows', None)
        # 设置value的显示长度为100,默认为50
        pd.set_option('max_colwidth', 80)

        # stks = ChnEquityInputStep()
        # tf_ds_step = FinancialStatementCSMaskedTFDatasetStep(df_equities=stks.train_items,
        #                                                      ds_pip="lambda ds: ds.repeat().batch(20)")
        # tf_ds_step._ds_generator_call()
        # for ele in tf_ds_step.tf_ds.take(10):
        #     print(ele)
            # y = model(ele[0])
            # loss = gs_mean_absolute_error(ele[1], y)
            # print(loss)
        symbol = "600315.SH"
        tushare = TuShareProData(use_l3_cache=True)

        df_zscore = equity_all_financial_statement_zscore(tushare, symbol)
        comp_type = equity_comp_type(tushare, symbol)

        df_y_for_pred = df_zscore.iloc[-20:][:]
        df_y_true_original = equity_all_financial_statement_by_enddate(tushare, symbol)[-20:][:]
        input_ids, position_id, token_id, attention_mask_id = FinancialStatementCSMaskedTFDatasetStep.df_to_model_input(
            df_y_for_pred, comp_type, False, True, False)
        # load model
        # model_hp = TFModelStep(model_cls_str=cls_to_str(TSBertForMaskedCS),
        #                        model_hp=TSBertForMaskedCS.HP(hidden_size=276, num_attention_heads=6, num_hidden_layers=10))
        model_hp = TFModelStep(model_cls_str=cls_to_str(TSBertForMaskedCS),
                               model_hp=TSBertForMaskedCS.HP(hidden_size=276, num_attention_heads=12))

        checkpoint_path = model_hp.check_point_path
        model = TSBertForMaskedCS.from_pre_saved(checkpoint_path)
        # add batch axis
        y_pred = model((input_ids[tf.newaxis, :], position_id[tf.newaxis, :], token_id[tf.newaxis, :],
                        attention_mask_id[tf.newaxis, :]))
        np_y_pred = y_pred[0].numpy()[0]  # 去掉 batch 维
        np_y_pred = np_y_pred[1:, 0:df_y_for_pred.shape[1]]  # 去掉 COMP_TYPE 维和 padding 的日期值
        df_y_pred = pd.DataFrame(data=np_y_pred,index=df_y_for_pred.index, columns=df_y_for_pred.columns)

        # de zscore 回原始值
        df_mean, df_std = equity_all_financial_statement_mean_and_std(tushare, symbol)
        df_y_pred_orig_val = de_zscore_to_val(df_y_pred, df_mean, df_std)
        # df_y_pred_orig_val = (df_y_for_pred/df_y_for_pred) *df_y_pred_orig_val
        delta_v = df_y_true_original.iloc[-1] - df_y_pred_orig_val.iloc[-1]
        delta_percentage = (df_y_true_original.iloc[-1] - df_y_pred_orig_val.iloc[-1]) / df_y_true_original.iloc[-1]

        # print(f"y_true:{df_y_true_original.iloc[-1]}")
        # print(f"y_pred:{df_y_pred_orig_val.iloc[-1]}")
        # print(f"delta_v:{delta_v}")
        print(f"delta_percentage:{delta_percentage.dropna().sort_values(ascending=True)}")
def get_default_inception_with_attention_model_weight_prediction_task_step() -> TFModelStep:
    model_step = TFModelStep(model_cls_str=cls_to_str(InceptionTimeWithAttentionForWeightPrediction),
                             model_hp=InceptionTimeWithAttentionForWeightPrediction.HP(
                                 inception_attention_hp=InceptionTimeWithAttentionBlock.HP(
                                     depth=6, use_residual=True,
                                     use_attention_at_input=True,
                                     use_attention_at_each_inception=True,
                                     use_attention_after_residual=True,
                                     inception_block_hp=InceptionBlock.HP(stride=1, use_bottleneck=True)
                                 )
                             ))
    return model_step
Beispiel #5
0
def cs_bert_equity_daily_workflow():
    start_t = date(2019, 1, 1)
    end_t = date(2019, 12, 31)

    i_t = IByTGeneratorStep(start_t=start_t, end_t=end_t - timedelta(days=92), sample_freq="2w",
                            train_val_split_ratio=0.95, evaluate_items_count=1,
                            use_concept_blocks=False, ls_i_by_condition=[("low_pe", "pe > 3.0 and pe < 8.0"),
                                                                         ("mid_pe", "pe > 15.0 and pe < 30.0"),
                                                                         ("high_pe", "pe > 30.0 and pe < 80.0"),
                                                                         ("low_pb", "pb >= 0.6 and pb <= 0.8"),
                                                                         ("mid_pb", "pb >= 0.9 and pb <= 1.1"),
                                                                         ("high_pb", "pb >= 1.3 and pb <= 1.8"),
                                                                         ("sml_cap",
                                                                          "total_mv >= 5.0e5 and total_mv < 5.0e6"),
                                                                         ("mid_cap",
                                                                          "total_mv >= 8.0e6 and total_mv < 2.0e7"),
                                                                         ("large_cap", "total_mv >= 2.0e7")
                                                                         ])

    # EquityPoolTSDatasetStep(df_i_by_t=i_t.pool_by_t, i_start_t=start_t, i_end_t=end_t,
    #                         ds_pip="lambda ds: ds.repeat().batch(8)")

    train_ds_with_pip = EquityPoolTSDatasetStep(ds_pip="lambda ds: ds.repeat().batch(5)",
                                                i_start_t=start_t, i_end_t=end_t,
                                                _input_steps=[(i_t, "train")])
    val_ds_with_pip = EquityPoolTSDatasetStep(ds_pip="lambda ds: ds.repeat().batch(5)",
                                              i_start_t=start_t, i_end_t=end_t,
                                              _input_steps=[(i_t, "validation")])
    eval_ds_with_pip = EquityPoolTSDatasetStep(ds_pip="lambda ds: ds.batch(3)",
                                               i_start_t=start_t, i_end_t=end_t,
                                               _input_steps=[(i_t, "evaluate")])

    train_input_steps = [(train_ds_with_pip, "train_ds"), (val_ds_with_pip, "val_ds"), (eval_ds_with_pip, "test_ds")]

    # model = TSBertForMaskedCS(
    #     hp=TSBertForMaskedCS.HP(hidden_size=EquityPoolTSDatasetStep.MAX_INDICATORS,
    #                             # 多一个作为 padding 的0
    #                             max_position_embeddings=EquityPoolTSDatasetStep.LOOK_PERIOD_ITEMS + 1,
    #                             type_vocab_size=EquityPoolTSDatasetStep.MAX_ENTITIES_PER_INST + 1,
    #                             num_attention_heads=12))

    model_name = TSBertName.CHN_EQUITY_DAILY_PREDICT_RETURN_LESS_INDICATORS
    model_step = TFModelStep(model_cls_str=cls_to_str(TSBertForMaskedCS),
                             model_hp=TSBertForMaskedCS.HP(
                                 name=model_name,
                                 hidden_size=EquityPoolTSDatasetStep.MAX_INDICATORS,
                                 max_position_embeddings=EquityPoolTSDatasetStep.LOOK_PERIOD_ITEMS + 1,
                                 type_vocab_size=EquityPoolTSDatasetStep.MAX_ENTITIES_PER_INST + 1,
                                 num_attention_heads=12)
                             )
    train_input_steps += [model_step]
    train_input_steps += [
        get_compile_step(loss="mae_align_to_y_true", metrics=["mae_align_to_y_true", "mse_align_to_y_true"])]
    # train_input_steps += get_recommend_fit_with_callback_steps(epochs=2, steps_per_epoch=10000, validation_steps=150)
    # train_input_steps += get_recommend_fit_with_callback_steps(epochs=1, steps_per_epoch=14000, validation_steps=200)
    train_input_steps += get_recommend_fit_with_callback_steps(epochs=3, steps_per_epoch=5000, validation_steps=200)
    # train_input_steps += get_recommend_fit_with_callback_steps(epochs=2, steps_per_epoch=20, validation_steps=4)

    train_step = TFTrainStep(_input_steps=train_input_steps)
    # 作为一个标准的 workflow 进行保存
    from gs_research_workflow.samples import workflow_cfg

    sample_file_path = os.path.join(os.path.dirname(workflow_cfg.__file__),
                                    f"{model_name}_workflow_v1.yml")
    print(sample_file_path)
    save_mapping_to_file_or_stream(sample_file_path, train_step.get_init_value_dict(True), None)
    return train_step
Beispiel #6
0
def for_notebook_eval_cs_financial_statement_mask():
    from gs_research_workflow.time_series.models.ts_bert import TSBertForMaskedCS, TSBertName
    from gs_research_workflow.time_series.gs_steps.model_steps import TFModelStep
    from gs_research_workflow.common.serialization_utilities import cls_to_str
    from gs_research_workflow.time_series.data.utilities import de_zscore_to_val
    from gs_research_workflow.common.path_utilities import _DATA_ROOT
    import os
    import sys

    PRINT_HIGHLIGHT_STYLE = "\033[1;37;41m"
    #  ---------- 不同的内容,只需要修改这一部分的参数项  ---------
    model_hp = TFModelStep(
        model_cls_str=cls_to_str(TSBertForMaskedCS),
        model_hp=TSBertForMaskedCS.HP(
            name=TSBertName.CHN_EQUITY_FINANCIAL_STATEMENT,
            hidden_size=276,
            num_attention_heads=12)
    )  # model hp 这里只能修改 num_attention_heads:[6,12] 和 num_hidden_layers[8,12,16,20]
    # ---------------------------------------------------------

    checkpoint_path = os.path.join(
        _DATA_ROOT, "ModelData", model_hp.model_cls.__name__,
        model_hp.model_init_hp.get_hash_str(
        ))  # 这里不能调用 TFModelStep.check_point_path() , 会创建目录的
    if not os.path.isdir(checkpoint_path):
        print(
            PRINT_HIGHLIGHT_STYLE,
            f"model path '{checkpoint_path}' is not existed! please check the model hyper-parameters"
        )
        raise RuntimeError(
            f"model path '{checkpoint_path}' is not existed! please check the model hyper-parameters"
        )
    checkpoint_file = os.path.join(checkpoint_path, "tf_model.h5")
    if not os.path.exists(checkpoint_file):
        print(PRINT_HIGHLIGHT_STYLE,
              f"model weight file '{checkpoint_file}' is not existed")
        raise RuntimeError(
            f"model weight file '{checkpoint_file}' is not existed")
    model = TSBertForMaskedCS.from_pre_saved(checkpoint_path)

    # -------------------------------------------------

    # 如果不需要更换 model ,只是换股票的话,只需要调整该 Cell
    symbol = "600315.SH"  # 预测的股票

    # -------------------------------------------------

    # 这部分代码不需要修改,在变更了参数项之后重新执行即可
    # 准备用于展示的数据

    import pandas as pd
    from gs_research_workflow.time_series.data.tushare_wrapper import TuShareProData
    from gs_research_workflow.time_series.data.predefined_equity_apis import equity_all_financial_statement_zscore, \
        equity_comp_type, equity_all_financial_statement_mean_and_std, equity_all_financial_statement_by_enddate
    from gs_research_workflow.time_series.gs_steps.tf_ds_for_financial_statement import \
        FinancialStatementCSMaskedTFDatasetStep
    import tensorflow as tf

    pd.set_option('display.max_columns', None)  # 显示所有列
    pd.set_option('display.max_rows', None)  # 显示所有行
    pd.set_option('max_colwidth', 80)

    tushare = TuShareProData(use_l3_cache=True)

    df_zscore, series_mean, series_std = equity_all_financial_statement_zscore(
        tushare, symbol, ret_mean_and_std=True)
    comp_type = equity_comp_type(tushare, symbol)

    df_y_for_pred = df_zscore.iloc[-20:][:]  # 暂时只提供预测已公布数据的最后一期值
    df_y_true_original = equity_all_financial_statement_by_enddate(
        tushare, symbol)[-20:][:]
    input_ids, position_id, token_id, attention_mask_id = FinancialStatementCSMaskedTFDatasetStep.df_to_model_input(
        df_y_for_pred, comp_type, series_std * 100., False, True, False)

    y_pred = model((input_ids[tf.newaxis, :], position_id[tf.newaxis, :],
                    token_id[tf.newaxis, :],
                    attention_mask_id[tf.newaxis, :]))  # add batch axis
    np_y_pred = y_pred[0].numpy()[0]  # 去掉 batch 维
    np_y_pred = np_y_pred[
        1:, 0:df_y_for_pred.shape[1]]  # 去掉 COMP_TYPE 维和 padding 的日期值
    df_y_pred = pd.DataFrame(data=np_y_pred,
                             index=df_y_for_pred.index,
                             columns=df_y_for_pred.columns)

    # de zscore 回原始值
    df_mean, df_std = equity_all_financial_statement_mean_and_std(
        tushare, symbol)
    df_y_pred_orig_val = de_zscore_to_val(df_y_pred, df_mean, df_std)

    delta_v = df_y_true_original.iloc[-1] - df_y_pred_orig_val.iloc[-1]
    delta_percentage = (
        df_y_true_original.iloc[-1] -
        df_y_pred_orig_val.iloc[-1]) / df_y_true_original.iloc[-1]

    df_pred_summary = pd.DataFrame({
        "true_val": df_y_true_original.iloc[-1],
        "pred_val": df_y_pred_orig_val.iloc[-1]
    }).dropna()
    df_pred_summary[
        "delta_v"] = df_pred_summary["true_val"] - df_pred_summary["pred_val"]
    df_pred_summary["delta_percentage"] = (df_pred_summary["true_val"] - df_pred_summary["pred_val"]) * 100. / \
                                          df_pred_summary["true_val"]

    df_pred_zscore = pd.DataFrame({
        "true_val": df_zscore.iloc[-1],
        "pred_val": df_y_pred.iloc[-1]
    }).dropna()

    print(df_pred_summary)
                                   symbols="000001.SH",
                                   cols=["close"])
    x_ts_data_step = SymbolTSStep(api="equity_backward_adjust_daily",
                                  cols=[
                                      "open", "high", "low", "close",
                                      "pre_close", "change", "pct_chg", "vol",
                                      "amount"
                                  ])

    train_val_tf_ds_step = DELTSCategoryMultiPeriodDatasetStep(_input_steps=[
        train_val_set, (time_align_step,
                        "time_align"), (x_ts_data_step, "x_data_callable")
    ])
    train_ds_step = FuncStrStep(func_body="lambda ds: ds.repeat().batch(10)")
    val_ds_step = FuncStrStep(func_body="lambda ds: ds.repeat().batch(10)")
    model_step = TFModelStep(model_cls_str=cls_to_str(InceptionTime),
                             model_hp=InceptionTime.HP(nb_classes=3))
    compile_step = CompileStep(loss="categorical_crossentropy",
                               optimizer="Adam",
                               metrics=['accuracy'])
    fit_step = FitStep(epochs=10, steps_per_epoch=4500, validation_steps=110)
    checkpoint_step = ModelCheckPointStep(save_best_only=True, verbose=1)
    tensor_board_step = TensorBoardStep(write_graph=False)

    train_step = TFTrainStep(_input_steps=[
        train_val_tf_ds_step, (train_ds_step,
                               "train_pip_line"), (val_ds_step,
                                                   "val_pip_line"), model_step,
        compile_step, fit_step, checkpoint_step, tensor_board_step
    ])

    # train_step.fit()
def get_default_ts_bert_for_weight_prediction_task_step() -> TFModelStep:
    model_step = TFModelStep(model_cls_str=cls_to_str(TSBertForWeightPrediction),
                             model_hp=TSBertForWeightPrediction.HP(hidden_size=72)) # hidden_size 必须与 lookback_period 相同
    return model_step