コード例 #1
0
    """
    def _init():
        
        env = environment(x, y, z, gamma, turnspc, penaltyscalar, policyname)
        env.seed(seed + rank)
        return env
    set_global_seeds(seed)
    return _init


if __name__ == '__main__':

    num_cpu = ncpu  # Number of processes to use
    # Create the vectorized environment
    env = SubprocVecEnv([make_env(x,y,z, i) for i in range(num_cpu)])
    eval_env=evalenv(x, y, z, gamma, turnspc, policyname)
    env1 =environment(x, y, z, gamma, turnspc, penaltyscalar, policyname) #env annealreate/ numturns*eval_freq
    # Stable Baselines provides you with make_vec_env() helper
    # which does exactly the previous steps for you:
    # env = make_vec_env(env_id, n_envs=num_cpu, seed=0)

    
    #create callbacks to record data, initiate events during training.
    callbacklist=CallbackList([TimeLimit(episodetimesteps), EvalCallback(eval_env, log_path=evpath, n_eval_episodes=100, eval_freq=50000
                                                                         , deterministic=False, best_model_save_path=evpath), EvalCallback(env1, log_path=savepath, n_eval_episodes=20, eval_freq=50000
                                                                         , deterministic=False, best_model_save_path=savepath)])
    if (os.path.exists("%s/best_model.zip" % savepath)):
        # Instantiate the agent
        model = A2C(policy, env, gamma=gamma, n_steps=episodetimesteps, learning_rate=LR,  verbose=1, n_cpu_tf_sess=num_cpu)
        # Load the trained agent
        model = A2C.load("%s/best_model" % savepath, env=env)
コード例 #2
0
    def _init():

        env = environment(x, y, z, gamma, turnspc, penaltyscalar, policyname)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init


if __name__ == '__main__':

    num_cpu = ncpu  # Number of processes to use
    # Create the vectorized environment
    env = SubprocVecEnv([make_env(x, y, z, i) for i in range(num_cpu)])
    eval_env = evalenv(x, y, z, gamma, turnspc, penaltyscalar, policyname)
    #env1 =environment(x, y, z, gamma, turnspc, policyname)
    # Stable Baselines provides you with make_vec_env() helper
    # which does exactly the previous steps for you:
    # env = make_vec_env(env_id, n_envs=num_cpu, seed=0)

    #create callbacks to record data, initiate events during training.
    callbacklist = CallbackList([
        TimeLimit(episodetimesteps),
        EvalCallback(eval_env,
                     log_path=evpath,
                     n_eval_episodes=100,
                     eval_freq=50000,
                     deterministic=False,
                     best_model_save_path=evpath),
        EvalCallback(env,