コード例 #1
0
 def _init():
     
     env = environment(x,y,z,gamma, cutoffpenaltyscalar, rg_prob, turnspc, savepath)
     env.seed(seed + rank)
     return env
コード例 #2
0
    """
    def _init():
        
        env = environment(x,y,z,gamma, cutoffpenaltyscalar, rg_prob, turnspc, savepath)
        env.seed(seed + rank)
        return env
    set_global_seeds(seed)
    return _init


if __name__ == '__main__':

    num_cpu = 15  # Number of processes to use
    # Create the vectorized environment
    env = SubprocVecEnv([make_env(x,y,z, i) for i in range(num_cpu)])
    eval_env=environment(x, y, z, gamma, cutoffpenaltyscalar, rg_prob, turnspc, savepath)
    # Stable Baselines provides you with make_vec_env() helper
    # which does exactly the previous steps for you:
    # env = make_vec_env(env_id, n_envs=num_cpu, seed=0)

    
    #create callbacks to record data, initiate events during training.
    callbacklist=CallbackList([TimeLimit(episodetimesteps), EvalCallback(eval_env, log_path=savepath, n_eval_episodes=5
                                                                         , deterministic=False, best_model_save_path=savepath)])
    

    #create model with Stable Baselines package.
    model = A2C(CnnPolicy, env, gamma=gamma, n_steps=updatesteps, learning_rate=LR,  verbose=1)#, tensorboard_log=scenario)
    model.learn(total_timesteps=episodetimesteps**50, callback=callbacklist) #total timesteps set to very large number so program will terminate based on runtime parameter)