Exemple #1
0
def test_main(tmp_path):
    """
    tests for the logger module
    """
    info("hi")
    debug("shouldn't appear")
    set_level(DEBUG)
    debug("should appear")
    configure(folder=str(tmp_path))
    record("a", 3)
    record("b", 2.5)
    dump()
    record("b", -2.5)
    record("a", 5.5)
    dump()
    info("^^^ should see a = 5.5")
    record_mean("b", -22.5)
    record_mean("b", -44.4)
    record("a", 5.5)
    dump()
    with ScopedConfigure(None, None):
        info("^^^ should see b = 33.3")

    with ScopedConfigure(str(tmp_path / "test-logger"), ["json"]):
        record("b", -2.5)
        dump()

    reset()
    record("a", "longasslongasslongasslongasslongasslongassvalue")
    dump()
    warn("hey")
    error("oh")
    record_dict({"test": 1})
def test_main(tmp_path):
    """
    tests for the logger module
    """
    info("hi")
    debug("shouldn't appear")
    assert get_level() == INFO
    set_level(DEBUG)
    assert get_level() == DEBUG
    debug("should appear")
    configure(folder=str(tmp_path))
    assert get_dir() == str(tmp_path)
    record("a", 3)
    record("b", 2.5)
    dump()
    record("b", -2.5)
    record("a", 5.5)
    dump()
    info("^^^ should see a = 5.5")
    record("f", "this text \n \r should appear in one line")
    dump()
    info('^^^ should see f = "this text \n \r should appear in one line"')
    record_mean("b", -22.5)
    record_mean("b", -44.4)
    record("a", 5.5)
    dump()
    with ScopedConfigure(None, None):
        info("^^^ should see b = 33.3")

    with ScopedConfigure(str(tmp_path / "test-logger"), ["json"]):
        record("b", -2.5)
        dump()

    reset()
    record("a", "longasslongasslongasslongasslongasslongassvalue")
    dump()
    warn("hey")
    error("oh")
    record_dict({"test": 1})
    assert isinstance(get_log_dict(), dict) and set(get_log_dict().keys()) == {"test"}
Exemple #3
0
def test_main():
    """
    tests for the logger module
    """
    info("hi")
    debug("shouldn't appear")
    set_level(DEBUG)
    debug("should appear")
    folder = "/tmp/testlogging"
    if os.path.exists(folder):
        shutil.rmtree(folder)
    configure(folder=folder)
    logkv("a", 3)
    logkv("b", 2.5)
    dumpkvs()
    logkv("b", -2.5)
    logkv("a", 5.5)
    dumpkvs()
    info("^^^ should see a = 5.5")
    logkv_mean("b", -22.5)
    logkv_mean("b", -44.4)
    logkv("a", 5.5)
    dumpkvs()
    with ScopedConfigure(None, None):
        info("^^^ should see b = 33.3")

    with ScopedConfigure("/tmp/test-logger/", ["json"]):
        logkv("b", -2.5)
        dumpkvs()

    reset()
    logkv("a", "longasslongasslongasslongasslongasslongassvalue")
    dumpkvs()
    warn("hey")
    error("oh")
    logkvs({"test": 1})
def train(env_id, num_timesteps, seed):
    # sess = util.single_threaded_session()
    # sess.__enter__()
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()

    # Create a new base directory like //home/marco/Reinforcement_Learning/Logs/openai-2018-05-21-12-27

    log_dir = os.path.join(
        energyplus_logbase_dir(),
        datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M"))
    if not os.path.exists(log_dir + '/output'):
        os.makedirs(log_dir + '/output')
    os.environ["ENERGYPLUS_LOG"] = log_dir
    model = os.getenv('ENERGYPLUS_MODEL')
    if model is None:
        print('Environment variable ENERGYPLUS_MODEL is not defined')
        exit()
    weather = os.getenv('ENERGYPLUS_WEATHER')
    if weather is None:
        print('Environment variable ENERGYPLUS_WEATHER is not defined')
        exit()

    # MPI is to parallelize training
    # Logs the training in a file log.txt in the given directory

    rank = MPI.COMM_WORLD.Get_rank()
    if rank == 0:
        print('train: init logger with dir={}'.format(log_dir))  # XXX
        logger.configure(log_dir)
    else:
        logger.configure(format_strings=[])
        logger.set_level(logger.DISABLED)

    # Make Gym environment:

    env = make_energyplus_env(env_id, workerseed)

    ###### EXPERIMENTS FROM FIRST PAPER: ###########################################
    #
    # trpo_mpi.learn(env,  policy_fn,
    #                max_timesteps=num_timesteps,
    #                timesteps_per_batch=16*1024, max_kl=0.01, cg_iters=10, cg_damping=0.1,
    #                gamma=0.99, lam=0.98, vf_iters=5, vf_stepsize=1e-3)

    # Apply TRPO algorithm from OpenAI baselines:

    # action_noise = NormalActionNoise(mean=np.zeros(4), sigma=0.1 * np.ones(4))
    #
    # policy_kwargs_tqc = dict(n_critics=2, n_quantiles=25)
    # model_tqc = TQC("MlpPolicy", env, top_quantiles_to_drop_per_net=2
    #                 , verbose=1, policy_kwargs=policy_kwargs_tqc)
    #
    # model_ppo = PPO('MlpPolicy', env, verbose=1, n_steps=4096, batch_size=64, n_epochs=15)
    # model_td3 = TD3('MlpPolicy', env, verbose=1, action_noise=action_noise)
    # model_sac = SAC('MlpPolicy', env, verbose=1)
    # model_ppolstm = PPO2(MlpLstmPolicy, env, verbose=1,n_steps=27, nminibatches=1)
    #
    # # Change the algorithm here:
    #
    # model_ppolstm.learn(total_timesteps=num_timesteps, log_interval=1, reset_num_timesteps=False)
    # # model_ppo.learning_rate = 0
    # # model_ppo.learn(total_timesteps=35040, reset_num_timesteps=False)
    #
    #####################################EXPERIMENTS 2: ###################################

    sac_v2_lstm(env, num_timesteps, train=True, test=False)
    #slac(env, num_timesteps)

    env.close()