예제 #1
0
def train():
    best_reward, best_reward_timesteps = None, None
    save_path = "model_save/"+MODEL_PATH+"/"
    if save_path is not None:
        os.makedirs(save_path, exist_ok=True)

    # log_dir = f"model_save/"
    log_dir = save_path
    env, env_eval = ENV(util='train', par=PARAM, dt=DT), ENV(util='val', par=PARAM, dt=DT)
    env, env_eval = Monitor(env, log_dir), Monitor(env_eval, log_dir)
    env, env_eval = DummyVecEnv([lambda: env]), DummyVecEnv([lambda: env_eval])
    # env = VecNormalize(env, norm_obs=True, norm_reward=True,
    #                clip_obs=10.)

    if PARAM['algo']=='td3':
        model = TD3('MlpPolicy', env, verbose=1, batch_size=PARAM['batch_size'], seed=PARAM['seed'],
                    learning_starts=PARAM['learning_starts'])
    elif PARAM['algo']=='ddpg':
        model = DDPG('MlpPolicy', env, verbose=1, batch_size=PARAM['batch_size'], seed=PARAM['seed'],
                     learning_starts=PARAM['learning_starts'])
    elif PARAM['algo']=='ppo':
        model = PPO('MlpPolicy', env, verbose=1, batch_size=PARAM['batch_size'], seed=PARAM['seed'])

    eval_callback = EvalCallback(env_eval, best_model_save_path=save_path+MODEL_PATH+'_best_model',
                                 log_path=log_dir, eval_freq=PARAM['eval_freq'], save_freq=PARAM['save_freq'],
                                 deterministic=True, render=False)

    model.learn(total_timesteps=int(PARAM['total_time_step']), callback=eval_callback, log_interval = 500)
    print("best mean reward:", eval_callback.best_mean_reward_overall, "timesteps:", eval_callback.best_mean_reward_timestep)
    model.save(save_path+MODEL_PATH+'_final_timesteps')
예제 #2
0
def test(MODEL_TEST):
    log_dir = "model_save/" + MODEL_PATH + "/" + MODEL_PATH + MODEL_TEST

    env = ENV(util='test', par=PARAM, dt=DT)
    env.render = True
    env = Monitor(env, log_dir)

    if PARAM['algo']=='td3':
        model = TD3.load(log_dir)
    elif PARAM['algo']=='ddpg':
        model = DDPG.load(log_dir)
    elif PARAM['algo']=='ppo':
        model = PPO.load(log_dir)

    # plot_results(f"model_save/")
    trade_dt = pd.DataFrame([])     # trade_dt: 所有股票的交易数据
    result_dt = pd.DataFrame([])    # result_dt: 所有股票一年测试结果数据
    for i in range(TEST_STOCK_NUM):
        state = env.reset()
        stock_bh_id = 'stock_bh_'+str(i)            # 记录每个股票交易的buy_hold
        stock_port_id = 'stock_port_'+str(i)        # 记录每个股票交易的portfolio
        stock_action_id = 'stock_action_' + str(i)  # 记录每个股票交易的action
        flow_L_id = 'stock_flow_' + str(i)          # 记录每个股票的流水
        stock_bh_dt, stock_port_dt, action_policy_dt, flow_L_dt = [], [], [], []
        day = 0
        while True:
            action = model.predict(state)
            next_state, reward, done, info = env.step(action[0])
            state = next_state
            # print("trying:",day,"reward:", reward,"now profit:",env.profit)   # 测试每一步的交易policy
            stock_bh_dt.append(env.buy_hold)
            stock_port_dt.append(env.Portfolio_unit)
            action_policy_dt.append(action[0][0])  # 用于记录policy
            flow_L_dt.append(env.flow)
            day+=1
            if done:
                print('stock: {}, total profit: {:.2f}%, buy hold: {:.2f}%, sp: {:.4f}, mdd: {:.2f}%, romad: {:.4f}'
                      .format(i, env.profit*100, env.buy_hold*100, env.sp, env.mdd*100, env.romad))
                # 交易完后记录:股票ID,利润(单位100%),buy_hold(单位100%),夏普率,最大回撤率(单位100%),romad
                result=pd.DataFrame([[i,env.profit*100,env.buy_hold*100,env.sp,env.mdd*100,env.romad]])
                break

        trade_dt_stock = pd.DataFrame({stock_port_id: stock_port_dt,
                                       stock_bh_id: stock_bh_dt,
                                       stock_action_id: action_policy_dt,
                                       flow_L_id: flow_L_dt})  # 支股票的交易数据

        trade_dt = pd.concat([trade_dt, trade_dt_stock], axis=1)    # 所有股票交易数据合并(加行)
        result_dt = pd.concat([result_dt,result],axis=0)            # 所有股票结果数据合并(加列)

    result_dt.columns = ['stock_id','prfit(100%)','buy_hold(100%)','sp','mdd(100%)','romad']
    trade_dt.to_csv('out_dt/trade_'+MODEL_PATH+'.csv',index=False)
    result_dt.to_csv('out_dt/result_'+MODEL_PATH+'.csv',index=False)
예제 #3
0
def train():

    log_dir = f"model_save/"
    env = ENV(istest=False)
    env = Monitor(env, log_dir)
    env = DummyVecEnv([lambda: env])
    # env = VecNormalize(env, norm_obs=True, norm_reward=True,
    #                clip_obs=10.)

    model = PPO('MlpPolicy',
                env,
                verbose=1,
                batch_size=PARAM['batch_size'],
                seed=PARAM['seed'])
    callback = SaveOnBestTrainingRewardCallback(check_freq=480,
                                                log_dir=log_dir)
    model.learn(total_timesteps=int(PARAM['total_time_step']),
                callback=callback,
                log_interval=480)
    model.save('model_save/' + MODEL_PATH)