env = environment(x, y, z, gamma, turnspc, scalar, policyname) env.seed(seed + rank) return env set_global_seeds(seed) return _init if __name__ == '__main__': num_cpu = ncpu # Number of processes to use # Create the vectorized environment env = SubprocVecEnv([make_env(x, y, z, i) for i in range(num_cpu)]) eval_env = evalenv(x, y, z, gamma, turnspc, policyname) env1 = environment(x, y, z, gamma, turnspc, scalar, policyname) #env annealreate/ numturns*eval_freq # Stable Baselines provides you with make_vec_env() helper # which does exactly the previous steps for you: # env = make_vec_env(env_id, n_envs=num_cpu, seed=0) #create callbacks to record data, initiate events during training. callbacklist = CallbackList([ TimeLimit(episodetimesteps), EvalCallback(eval_env, log_path=evpath, n_eval_episodes=100, eval_freq=50000, deterministic=False, best_model_save_path=evpath), EvalCallback(env1, log_path=savepath,
def _init(): env = environment(x, y, z, gamma, turnspc, scalar, policyname) env.seed(seed + rank) return env
def _init(): env = environment(x, y, z, gamma, turnspc, policyname) env.seed(seed + rank) return env set_global_seeds(seed) return _init if __name__ == '__main__': num_cpu = ncpu # Number of processes to use # Create the vectorized environment env = SubprocVecEnv([make_env(x,y,z, i) for i in range(num_cpu)]) eval_env=evalenv(x, y, z, gamma, turnspc, policyname) env1 =environment(x, y, z, gamma, turnspc, policyname, annealrate=10000*(20*x*y*z*turnspc)/50000) #env annealreate/ numturns*eval_freq # Stable Baselines provides you with make_vec_env() helper # which does exactly the previous steps for you: # env = make_vec_env(env_id, n_envs=num_cpu, seed=0) #create callbacks to record data, initiate events during training. callbacklist=CallbackList([TimeLimit(episodetimesteps), EvalCallback(eval_env, log_path=evpath, n_eval_episodes=100, eval_freq=50000 , deterministic=False, best_model_save_path=evpath), EvalCallback(env1, log_path=savepath, n_eval_episodes=20, eval_freq=50000 , deterministic=False, best_model_save_path=savepath)]) if (os.path.exists("%s/best_model.zip" % savepath)): # Instantiate the agent model = A2C(policy, env, gamma=gamma, n_steps=episodetimesteps, learning_rate=LR, verbose=1, n_cpu_tf_sess=num_cpu) # Load the trained agent model = A2C.load("%s/best_model" % savepath, env=env) print('loaded agent')