def test_main(tmp_path): """ tests for the logger module """ info("hi") debug("shouldn't appear") set_level(DEBUG) debug("should appear") configure(folder=str(tmp_path)) record("a", 3) record("b", 2.5) dump() record("b", -2.5) record("a", 5.5) dump() info("^^^ should see a = 5.5") record_mean("b", -22.5) record_mean("b", -44.4) record("a", 5.5) dump() with ScopedConfigure(None, None): info("^^^ should see b = 33.3") with ScopedConfigure(str(tmp_path / "test-logger"), ["json"]): record("b", -2.5) dump() reset() record("a", "longasslongasslongasslongasslongasslongassvalue") dump() warn("hey") error("oh") record_dict({"test": 1})
def test_main(tmp_path): """ tests for the logger module """ info("hi") debug("shouldn't appear") assert get_level() == INFO set_level(DEBUG) assert get_level() == DEBUG debug("should appear") configure(folder=str(tmp_path)) assert get_dir() == str(tmp_path) record("a", 3) record("b", 2.5) dump() record("b", -2.5) record("a", 5.5) dump() info("^^^ should see a = 5.5") record("f", "this text \n \r should appear in one line") dump() info('^^^ should see f = "this text \n \r should appear in one line"') record_mean("b", -22.5) record_mean("b", -44.4) record("a", 5.5) dump() with ScopedConfigure(None, None): info("^^^ should see b = 33.3") with ScopedConfigure(str(tmp_path / "test-logger"), ["json"]): record("b", -2.5) dump() reset() record("a", "longasslongasslongasslongasslongasslongassvalue") dump() warn("hey") error("oh") record_dict({"test": 1}) assert isinstance(get_log_dict(), dict) and set(get_log_dict().keys()) == {"test"}
def test_main(): """ tests for the logger module """ info("hi") debug("shouldn't appear") set_level(DEBUG) debug("should appear") folder = "/tmp/testlogging" if os.path.exists(folder): shutil.rmtree(folder) configure(folder=folder) logkv("a", 3) logkv("b", 2.5) dumpkvs() logkv("b", -2.5) logkv("a", 5.5) dumpkvs() info("^^^ should see a = 5.5") logkv_mean("b", -22.5) logkv_mean("b", -44.4) logkv("a", 5.5) dumpkvs() with ScopedConfigure(None, None): info("^^^ should see b = 33.3") with ScopedConfigure("/tmp/test-logger/", ["json"]): logkv("b", -2.5) dumpkvs() reset() logkv("a", "longasslongasslongasslongasslongasslongassvalue") dumpkvs() warn("hey") error("oh") logkvs({"test": 1})
def train(env_id, num_timesteps, seed): # sess = util.single_threaded_session() # sess.__enter__() workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() # Create a new base directory like //home/marco/Reinforcement_Learning/Logs/openai-2018-05-21-12-27 log_dir = os.path.join( energyplus_logbase_dir(), datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M")) if not os.path.exists(log_dir + '/output'): os.makedirs(log_dir + '/output') os.environ["ENERGYPLUS_LOG"] = log_dir model = os.getenv('ENERGYPLUS_MODEL') if model is None: print('Environment variable ENERGYPLUS_MODEL is not defined') exit() weather = os.getenv('ENERGYPLUS_WEATHER') if weather is None: print('Environment variable ENERGYPLUS_WEATHER is not defined') exit() # MPI is to parallelize training # Logs the training in a file log.txt in the given directory rank = MPI.COMM_WORLD.Get_rank() if rank == 0: print('train: init logger with dir={}'.format(log_dir)) # XXX logger.configure(log_dir) else: logger.configure(format_strings=[]) logger.set_level(logger.DISABLED) # Make Gym environment: env = make_energyplus_env(env_id, workerseed) ###### EXPERIMENTS FROM FIRST PAPER: ########################################### # # trpo_mpi.learn(env, policy_fn, # max_timesteps=num_timesteps, # timesteps_per_batch=16*1024, max_kl=0.01, cg_iters=10, cg_damping=0.1, # gamma=0.99, lam=0.98, vf_iters=5, vf_stepsize=1e-3) # Apply TRPO algorithm from OpenAI baselines: # action_noise = NormalActionNoise(mean=np.zeros(4), sigma=0.1 * np.ones(4)) # # policy_kwargs_tqc = dict(n_critics=2, n_quantiles=25) # model_tqc = TQC("MlpPolicy", env, top_quantiles_to_drop_per_net=2 # , verbose=1, policy_kwargs=policy_kwargs_tqc) # # model_ppo = PPO('MlpPolicy', env, verbose=1, n_steps=4096, batch_size=64, n_epochs=15) # model_td3 = TD3('MlpPolicy', env, verbose=1, action_noise=action_noise) # model_sac = SAC('MlpPolicy', env, verbose=1) # model_ppolstm = PPO2(MlpLstmPolicy, env, verbose=1,n_steps=27, nminibatches=1) # # # Change the algorithm here: # # model_ppolstm.learn(total_timesteps=num_timesteps, log_interval=1, reset_num_timesteps=False) # # model_ppo.learning_rate = 0 # # model_ppo.learn(total_timesteps=35040, reset_num_timesteps=False) # #####################################EXPERIMENTS 2: ################################### sac_v2_lstm(env, num_timesteps, train=True, test=False) #slac(env, num_timesteps) env.close()