def test_calibrated_agent(self): """Does the calibrated agent work as expected?""" # Initialize engine eng = engines.ContinuousTwoZoneEngine( nsteps=100, use_qdot=True, fuel="PRF100", rxnmech="llnl_gasoline_surrogate_323.xml", ename="Isooctane_MBT_DI_50C_Summ.xlsx", ) # Initialize the agent env = DummyVecEnv([lambda: eng]) agent = agents.CalibratedAgent(env) agent.learn() # Evaluate the agent t0 = time.time() df, total_reward = utilities.evaluate_agent(env, agent) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=0, name="calibrated") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002195212151) npt.assert_allclose(np.linalg.norm(df.p), 22012100.17143623) npt.assert_allclose(np.linalg.norm(df["T"]), 14210.47662980) npt.assert_allclose(np.linalg.norm(df.rewards), 104.47362155) npt.assert_allclose(np.linalg.norm(df.mdot), 0.04144044) npt.assert_allclose(np.linalg.norm(df.qdot), 97686.91574242) print(f"Wall time for CalibratedAgent = {elapsed} seconds")
def test_exhaustive_agent(self): """Does the exhaustive agent work as expected?""" # Initialize engine eng = engines.DiscreteTwoZoneEngine( nsteps=101, mdot=0.1, max_minj=2.5e-5, fuel="dodecane", rxnmech="dodecane_lu_nox.cti", ename="Isooctane_MBT_DI_50C_Summ.xlsx", reward=rw.Reward(negative_reward=-0.05), ) env = DummyVecEnv([lambda: eng]) variables = eng.observables + eng.internals + eng.histories df = pd.DataFrame( columns=list(dict.fromkeys(variables + eng.action.actions + ["rewards"])) ) # Initialize the agent agent = agents.ExhaustiveAgent(env) agent.learn() # Evaluate the agent t0 = time.time() df, total_reward = utilities.evaluate_agent(env, agent) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=1, name="exhaustive") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495) npt.assert_allclose(np.linalg.norm(df.p), 25431213.9403193) npt.assert_allclose(np.linalg.norm(df["T"]), 13611.58370927) npt.assert_allclose(np.linalg.norm(df.rewards), 101.41373957) npt.assert_allclose(np.linalg.norm(df.mdot), 0.1) print(f"Wall time for ExhaustiveAgent = {elapsed} seconds")
tensorboard_log=logdir, ) agent.learn( total_timesteps=agent_params["number_episodes"].value * (eng_params["nsteps"].value - 1) * agent_params["nranks"].value, callback=callback, ) elif agent_params["agent"].value == "manual": env = DummyVecEnv([lambda: eng]) agent = agents.ManualAgent(env) agent.learn(agent_params["injection_cas"].value, agent_params["qdot_cas"].value) # Save, evaluate, and plot the agent pfx = os.path.join(logdir, "agent") agent.save(pfx) env = DummyVecEnv([lambda: eng]) df, total_reward = utilities.evaluate_agent(env, agent) df.to_csv(pfx + ".csv", index=False) utilities.plot_df(env, df, idx=0, name=agent_params["agent"].value) utilities.save_plots(pfx + ".pdf") # Plot the training history logs = pd.read_csv(logname) utilities.plot_training(logs, os.path.join(logdir, "logger.pdf")) # output timer end = time.time() - start print(f"Elapsed time {timedelta(seconds=end)} (or {end} seconds)")
def test_equilibrate_engine(self): """Does the EquilibrateEngine work as expected?""" # Initialize engine eng = engines.EquilibrateEngine( nsteps=101, Tinj=300.0, rxnmech="dodecane_lu_nox.cti", mdot=0.1, max_minj=5e-5, ename="Isooctane_MBT_DI_50C_Summ.xlsx", reward=rw.Reward(negative_reward=-0.05), ) env = DummyVecEnv([lambda: eng]) variables = eng.observables + eng.internals + eng.histories df = pd.DataFrame( columns=list( dict.fromkeys( variables + eng.action.actions + ["rewards"] + eng.reward.get_rewards() ) ) ) # Evaluate a dummy agent that injects at a fixed time t0 = time.time() done = False cnt = 0 obs = env.reset() df.loc[cnt, variables] = [eng.current_state[k] for k in variables] df.loc[cnt, eng.action.actions] = 0 rwd = list( eng.reward.compute(eng.current_state, eng.nsteps, False, False).values() ) df.loc[cnt, eng.reward.get_rewards()] = rwd df.loc[cnt, ["rewards"]] = [sum(rwd)] while not done: cnt += 1 # Agent tries to inject twice, but is not allowed the second time action = ( [1] if (eng.current_state["ca"] == -10) or eng.current_state["ca"] == 10 else [0] ) obs, reward, done, info = env.step(action) df.loc[cnt, variables] = [info[0]["current_state"][k] for k in variables] df.loc[cnt, eng.action.actions] = eng.action.current df.loc[cnt, ["rewards"]] = reward df.loc[cnt, eng.reward.get_rewards()] = list(info[0]["rewards"].values()) for rwd in eng.reward.get_rewards() + ["rewards"]: df[f"cumulative_{rwd}"] = np.cumsum(df[rwd]) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=4, name="EQ") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495) npt.assert_allclose(np.linalg.norm(df.p), 35436062.48197973) npt.assert_allclose(np.linalg.norm(df["T"]), 12491.93935531) npt.assert_allclose(np.linalg.norm(df.rewards), 118.62610333) npt.assert_allclose(np.linalg.norm(df.mdot), 0.14142136) print(f"Wall time for EquilibrateEngine = {elapsed} seconds")
def test_reactor_engine_with_complex_reward(self): """Does the ReactorEngine with complex reward work as expected?""" # Initialize engine reward = rw.Reward( names=["work", "nox", "soot"], norms=[1.0, 5e-8, 1e-9], weights=[0.34, 0.33, 0.33], negative_reward=-100.0, randomize=False, ) eng = engines.ReactorEngine( nsteps=101, Tinj=300.0, rxnmech="dodecane_lu_nox.cti", mdot=0.1, max_minj=5e-5, ename="Isooctane_MBT_DI_50C_Summ.xlsx", reward=reward, ) env = DummyVecEnv([lambda: eng]) variables = eng.observables + eng.internals + eng.histories df = pd.DataFrame( columns=list( dict.fromkeys( variables + eng.action.actions + ["rewards"] + eng.reward.get_rewards() ) ) ) # Evaluate a dummy agent that injects at a fixed time t0 = time.time() done = False cnt = 0 obs = env.reset() df.loc[cnt, variables] = [eng.current_state[k] for k in variables] df.loc[cnt, eng.action.actions] = 0 rwd = list( eng.reward.compute(eng.current_state, eng.nsteps, False, False).values() ) df.loc[cnt, eng.reward.get_rewards()] = rwd df.loc[cnt, ["rewards"]] = [sum(rwd)] while not done: cnt += 1 # Agent tries to inject twice, but is not allowed the second time action = ( [1] if (eng.current_state["ca"] == 0) or eng.current_state["ca"] == 2 else [0] ) obs, reward, done, info = env.step(action) df.loc[cnt, variables] = [info[0]["current_state"][k] for k in variables] df.loc[cnt, eng.action.actions] = eng.action.current df.loc[cnt, ["rewards"]] = reward df.loc[cnt, eng.reward.get_rewards()] = list(info[0]["rewards"].values()) for rwd in eng.reward.get_rewards() + ["rewards"]: df[f"cumulative_{rwd}"] = np.cumsum(df[rwd]) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=6, name="reactor") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495) npt.assert_allclose(np.linalg.norm(df.p), 34254670.52877185, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df["T"]), 18668.46491609, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.rewards), 54.47632708, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.r_work), 53.47224436, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.r_nox), 14.10312665, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.w_work), 3.41695771, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.w_nox), 3.31645895, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.w_soot), 3.31645895, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.mdot), 0.14142135623730953) print(f"Wall time for ReactorEngine (complex reward) = {elapsed} seconds")
def test_discrete_twozone_engine_with_delay(self): """Does the DiscreteTwoZoneEngine with injection delay work as expected?""" # Initialize engine eng = engines.DiscreteTwoZoneEngine( nsteps=101, fuel="PRF100", rxnmech="llnl_gasoline_surrogate_323.xml", mdot=0.1, max_minj=5e-5, injection_delay=0.0025, ename="Isooctane_MBT_DI_50C_Summ.xlsx", reward=rw.Reward(negative_reward=-101.0), ) env = DummyVecEnv([lambda: eng]) variables = eng.observables + eng.internals + eng.histories df = pd.DataFrame( columns=list( dict.fromkeys( variables + eng.action.actions + ["rewards"] + eng.reward.get_rewards() ) ) ) # Evaluate a dummy agent that injects at a fixed time t0 = time.time() done = False cnt = 0 obs = env.reset() df.loc[cnt, variables] = [eng.current_state[k] for k in variables] df.loc[cnt, eng.action.actions] = 0 rwd = list( eng.reward.compute(eng.current_state, eng.nsteps, False, False).values() ) df.loc[cnt, eng.reward.get_rewards()] = rwd df.loc[cnt, ["rewards"]] = [sum(rwd)] while not done: cnt += 1 # Agent tries to inject thrice, but is not allowed the second time action = ( [1] if (eng.current_state["ca"] == -10) or eng.current_state["ca"] == 10 or eng.current_state["ca"] == 16 else [0] ) obs, reward, done, info = env.step(action) df.loc[cnt, variables] = [info[0]["current_state"][k] for k in variables] df.loc[cnt, eng.action.actions] = eng.action.current df.loc[cnt, ["rewards"]] = reward df.loc[cnt, eng.reward.get_rewards()] = list(info[0]["rewards"].values()) for rwd in eng.reward.get_rewards() + ["rewards"]: df[f"cumulative_{rwd}"] = np.cumsum(df[rwd]) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=5, name="DiscreteTwoZone (delay)") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495) npt.assert_allclose(np.linalg.norm(df.p), 35142241.61422163) npt.assert_allclose(np.linalg.norm(df["T"]), 20971.07323643) npt.assert_allclose(np.linalg.norm(df.rewards), 153.11736491) npt.assert_allclose(np.linalg.norm(df.mdot), 0.14142136) print(f"Wall time for DiscreteTwoZoneEngine with delay = {elapsed} seconds")
rxnmech=eng_params["rxnmech"].value, observables=eng_params["observables"].value, twozone_phi=eng_params["twozone_phi"].value, use_qdot=eng_params["use_qdot"].value, ) env = DummyVecEnv([lambda: eng]) agent_params = params.inputs["agent"] if agent_params["agent"].value == "calibrated": agent = agents.CalibratedAgent(env) agent.learn() elif agent_params["agent"].value == "exhaustive": agent = agents.ExhaustiveAgent(env) agent.load(fname, env) elif agent_params["agent"].value == "ppo": agent = PPO2.load(fname, env=env) elif agent_params["agent"].value == "manual": agent = agents.ManualAgent(env=env) agent.load(fname, env) df, total_reward = utilities.evaluate_agent(env, agent) print(f"The total reward for {fname} is {total_reward}.") print(f"The total work for {fname} is {df.work.iloc[-1]}.") if "nox" in df.columns: print(f"The EOC NOx for {fname} is {df.nox.iloc[-1]}.") name = agent_params[ "agent"].value if args.labels is None else args.labels[k] utilities.plot_df(env, df, idx=k, name=name, plot_exp=args.exp) utilities.save_plots("compare.pdf", legends=args.legends)