import __future__ import gym from stable_baselines.common.vec_env import VecCheckNan from stable_baselines.common import make_vec_env from stable_baselines import PPO2 env = make_vec_env('CMuRL_Env:CMuRL-Env-v0') env = VecCheckNan(env, raise_exception=True) model = PPO2.load('CMuRL_Model_v6') obs = env.reset() while True: action, _states = model.predict(obs) obs, rewards, _, _ = env.step(action) env.render()
gym.register(id="rustyblocks-v0", entry_point="custom_env:RustyBlocksEnv") env = gym.make("rustyblocks-v0") env.max_invalid_tries = 7 env = VecCheckNan(DummyVecEnv([lambda: env])) begin = 0 step_size = int(2e4) # dirname = "models/" # logdirname = "boardlog/" # modeldir = "ppo2boxbestparam/" # file_step = "2e4-" # for f in listdir(dirname+modeldir): # if f.startswith(file_step): # end = f[len(file_step):] # num = int(end[:len(end)-4]) # if num > begin: # begin = num # seed 420420420 # Instantiate the agent # model = PPO2('MlpPolicy', env,verbose=1,max_grad_norm=1.42481794257356,cliprange=1.36870169927419, vf_coef=0.487354638658612, ent_coef=0.000130839434944482, gamma=0.993211512071304, lam=0.92669713813749, learning_rate=0.00150606967404027, n_steps=709, noptepochs=35,nminibatches=1) # Train the agent print("STARTING TO LEARN", begin) model = PPO2.load("models/pretrain/50000_heutistic_pretrain_discrete.pkl", env=env) while True: env.reset() model.learn(total_timesteps=step_size, log_interval=100) begin += 1 # Save the agent # model.save(dirname+modeldir+file_step+str(begin)) print("saved learnstep", begin, "total iterations are now", begin * step_size)