def _init(): env = environment(x,y,z,gamma, cutoffpenaltyscalar, rg_prob, turnspc, savepath) env.seed(seed + rank) return env
""" def _init(): env = environment(x,y,z,gamma, cutoffpenaltyscalar, rg_prob, turnspc, savepath) env.seed(seed + rank) return env set_global_seeds(seed) return _init if __name__ == '__main__': num_cpu = 15 # Number of processes to use # Create the vectorized environment env = SubprocVecEnv([make_env(x,y,z, i) for i in range(num_cpu)]) eval_env=environment(x, y, z, gamma, cutoffpenaltyscalar, rg_prob, turnspc, savepath) # Stable Baselines provides you with make_vec_env() helper # which does exactly the previous steps for you: # env = make_vec_env(env_id, n_envs=num_cpu, seed=0) #create callbacks to record data, initiate events during training. callbacklist=CallbackList([TimeLimit(episodetimesteps), EvalCallback(eval_env, log_path=savepath, n_eval_episodes=5 , deterministic=False, best_model_save_path=savepath)]) #create model with Stable Baselines package. model = A2C(CnnPolicy, env, gamma=gamma, n_steps=updatesteps, learning_rate=LR, verbose=1)#, tensorboard_log=scenario) model.learn(total_timesteps=episodetimesteps**50, callback=callbacklist) #total timesteps set to very large number so program will terminate based on runtime parameter)