def train_ddpg(): log_dir = f"model_save/" env = ENV(istest=False) env = Monitor(env, log_dir) env = DummyVecEnv([lambda: env]) # env = VecNormalize(env, norm_obs=True, norm_reward=True, # clip_obs=10.) model = DDPG("CnnPolicy", env, policy_kwargs=policy_kwargs, verbose=1, batch_size=2048, seed=1, learning_starts=500000) callback = SaveOnBestTrainingRewardCallback(check_freq=480, log_dir=log_dir) model.learn(total_timesteps=int(1000000), callback = callback, log_interval = 480) model.save('model_save/ddpg_cnn')
def test_ddpg(): log_dir = f"model_save/best_model_ddpg_cnn" env = ENV(istest=True) env.render = True env = Monitor(env, log_dir) model = DDPG.load(log_dir) plot_results(f"model_save/") for i in range(10): state = env.reset() while True: action = model.predict(state) next_state, reward, done, info = env.step(action[0]) state = next_state # print("trying:",i,"action:", action,"now profit:",env.profit) if done: print('stock',i,' total profit=',env.profit,' buy hold=',env.buy_hold) break