import time # from stable_baselines.common.policies import MlpPolicy from stable_baselines.common.vec_env import DummyVecEnv from stable_baselines import DDPG from stable_baselines.ddpg.policies import MlpPolicy if __name__ == '__main__': # Create and wrap the environment # env = gym.make('game-stock-exchange-v0') env = gym.make('game-stock-exchange-continuous-v0') env = DummyVecEnv([lambda: env]) model = DDPG(MlpPolicy, env, verbose=1) # model = A2C.load("a2c_gym_exchange_continuous", env=env) model.learning_rate = 1e-7 # Train the agent model.learn(total_timesteps=100000) # default=1000000 # Save the agent model.save("ddpg_gym_exchange_continuous") # del model # delete trained model to demonstrate loading # Load the trained agent model = DDPG.load("ddpg_gym_exchange_continuous", env=env) # Enjoy trained agent obs = env.reset() actions = Counter() pnl = defaultdict(float)
str([ my_signal_rate, my_signal_repetitions, my_step_limit, lr_start, lr_end, timesteps ])) f.close() except: print("envparameters couldn't be saved. They are:" + str([ my_signal_rate, my_signal_repetitions, my_step_limit, lr_start, lr_end, timesteps ])) lr_update_interval = 500 model_save_interval = 20000 modulo_number = model_save_interval / lr_update_interval i = 0 while (i <= (timesteps / lr_update_interval)): # linear: model.learning_rate = lr_start-(lr_stepsize*(i+pretraining_iterations)) # log: model.learning_rate = lr_start * 0.5**((i * lr_update_interval) * (10 / timesteps)) # static: model.learning_rate = static_learning_rate model.learn(total_timesteps=lr_update_interval, tb_log_name=name, log_interval=10, reset_num_timesteps=False) if (i % modulo_number == (modulo_number - 1)): model.save("/media/ryuga/TOSHIBA EXT/BA/Models/" + name + "_" + str(i / modulo_number)) i += 1