def test_reactor_engine_copies(self): """Do deepcopy, pickling and repr of ReactorEngine work as expected?""" orig = engines.ReactorEngine( nsteps=101, Tinj=300.0, rxnmech="dodecane_lu_nox.cti", mdot=0.1, max_minj=5e-5, reward=rw.Reward(negative_reward=-0.05), ) assert_deepcopy_pickle_repr(orig)
def test_exhaustive_agent(self): """Does the exhaustive agent work as expected?""" # Initialize engine eng = engines.DiscreteTwoZoneEngine( nsteps=101, mdot=0.1, max_minj=2.5e-5, fuel="dodecane", rxnmech="dodecane_lu_nox.cti", ename="Isooctane_MBT_DI_50C_Summ.xlsx", reward=rw.Reward(negative_reward=-0.05), ) env = DummyVecEnv([lambda: eng]) variables = eng.observables + eng.internals + eng.histories df = pd.DataFrame( columns=list(dict.fromkeys(variables + eng.action.actions + ["rewards"])) ) # Initialize the agent agent = agents.ExhaustiveAgent(env) agent.learn() # Evaluate the agent t0 = time.time() df, total_reward = utilities.evaluate_agent(env, agent) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=1, name="exhaustive") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495) npt.assert_allclose(np.linalg.norm(df.p), 25431213.9403193) npt.assert_allclose(np.linalg.norm(df["T"]), 13611.58370927) npt.assert_allclose(np.linalg.norm(df.rewards), 101.41373957) npt.assert_allclose(np.linalg.norm(df.mdot), 0.1) print(f"Wall time for ExhaustiveAgent = {elapsed} seconds")
def __init__( self, nsteps=100, ivc=-100.0, evo=100.0, fuel="dodecane", rxnmech="dodecane_lu_nox.cti", max_pressure=200.0, ename="Scorpion.xlsx", reward=rw.Reward(), use_qdot=False, ): """Initialize Engine :param nsteps: number of steps :type nsteps: int :param ivc: crank angle of intake valve closed :type ivc: float :param evo: crank angle of exhaust valve open :type evo: float :param fuel: fuel type :type fuel: str :param rxnmech: mechanism file :type rxnmech: str :param max_pressure: maximum pressure allowed in engine (atm) :type max_pressure: float :param ename: file describing the engine :type ename: str :param reward: reward :type reward: Reward() :param use_qdot: bool to use Qdot as an action :type use_qdot: bool :returns: Engine :rtype: Engine() """ super(Engine, self).__init__() # Engine parameters self.T0, self.p0 = calibrated_engine_ic() self.nsteps = nsteps self.ivc = ivc self.evo = evo self.fuel = fuel self.rxnmech = rxnmech self.small_mass = 1.0e-15 self.max_burned_mass = 6e-3 self.max_pressure = max_pressure self.ename = ename self.reward = reward self.returns = {k: 0.0 for k in self.reward.names} self.rewards = {k: 0.0 for k in self.reward.names} self.nepisode = 0 self.action = None self.state_updater = {} self.state_reseter = {} self.datadir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "datafiles") self.use_qdot = use_qdot self.qdot_value = 11250 # hardcode for now self.observable_attributes = { "ca": { "low": self.ivc, "high": self.evo, "scale": 0.5 * (self.evo - self.ivc), }, "p": { "low": 0.0, "high": np.finfo(np.float32).max, "scale": ct.one_atm * 100, }, "T": { "low": 0.0, "high": np.finfo(np.float32).max, "scale": 2000 }, "attempt_ninj": { "low": 0.0, "high": np.iinfo(np.int32).max, "scale": 1.0 }, "success_ninj": { "low": 0.0, "high": np.iinfo(np.int32).max, "scale": 1.0 }, "can_inject": { "low": 0, "high": 1, "scale": 1 }, "nox": { "low": 0.0, "high": np.finfo(np.float32).max, "scale": 1.0 }, "soot": { "low": 0.0, "high": np.finfo(np.float32).max, "scale": 1.0 }, } self.observable_attributes.update( self.reward.get_observable_attributes())
aname = os.path.join(logdir, "actions.npz") if os.path.exists(aname): os.remove(aname) repo = git.Repo(os.path.abspath(__file__), search_parent_directories=True) with open(os.path.join(logdir, "hash.txt"), "w") as f: f.write(f"hash: {repo.head.object.hexsha}\n") pickle.dump(args, open(os.path.join(logdir, "args.pkl"), "wb")) best_reward = -np.inf # Initialize the reward rwd_params = params.inputs["reward"] reward = rw.Reward( names=rwd_params["names"].value, norms=rwd_params["norms"].value, weights=rwd_params["weights"].value, negative_reward=rwd_params["negative_reward"].value, EOC_reward=rwd_params["EOC_reward"].value, randomize=rwd_params["randomize"].value, random_updates=rwd_params["random_updates"].value, ) # Initialize the engine eng_params = params.inputs["engine"] if eng_params["engine"].value == "reactor-engine": eng = engines.ReactorEngine( Tinj=eng_params["Tinj"].value, nsteps=eng_params["nsteps"].value, mdot=eng_params["mdot"].value, max_minj=eng_params["max_minj"].value, injection_delay=eng_params["injection_delay"].value, max_pressure=eng_params["max_pressure"].value,
def test_equilibrate_engine(self): """Does the EquilibrateEngine work as expected?""" # Initialize engine eng = engines.EquilibrateEngine( nsteps=101, Tinj=300.0, rxnmech="dodecane_lu_nox.cti", mdot=0.1, max_minj=5e-5, ename="Isooctane_MBT_DI_50C_Summ.xlsx", reward=rw.Reward(negative_reward=-0.05), ) env = DummyVecEnv([lambda: eng]) variables = eng.observables + eng.internals + eng.histories df = pd.DataFrame( columns=list( dict.fromkeys( variables + eng.action.actions + ["rewards"] + eng.reward.get_rewards() ) ) ) # Evaluate a dummy agent that injects at a fixed time t0 = time.time() done = False cnt = 0 obs = env.reset() df.loc[cnt, variables] = [eng.current_state[k] for k in variables] df.loc[cnt, eng.action.actions] = 0 rwd = list( eng.reward.compute(eng.current_state, eng.nsteps, False, False).values() ) df.loc[cnt, eng.reward.get_rewards()] = rwd df.loc[cnt, ["rewards"]] = [sum(rwd)] while not done: cnt += 1 # Agent tries to inject twice, but is not allowed the second time action = ( [1] if (eng.current_state["ca"] == -10) or eng.current_state["ca"] == 10 else [0] ) obs, reward, done, info = env.step(action) df.loc[cnt, variables] = [info[0]["current_state"][k] for k in variables] df.loc[cnt, eng.action.actions] = eng.action.current df.loc[cnt, ["rewards"]] = reward df.loc[cnt, eng.reward.get_rewards()] = list(info[0]["rewards"].values()) for rwd in eng.reward.get_rewards() + ["rewards"]: df[f"cumulative_{rwd}"] = np.cumsum(df[rwd]) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=4, name="EQ") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495) npt.assert_allclose(np.linalg.norm(df.p), 35436062.48197973) npt.assert_allclose(np.linalg.norm(df["T"]), 12491.93935531) npt.assert_allclose(np.linalg.norm(df.rewards), 118.62610333) npt.assert_allclose(np.linalg.norm(df.mdot), 0.14142136) print(f"Wall time for EquilibrateEngine = {elapsed} seconds")
def test_reactor_engine_with_complex_reward(self): """Does the ReactorEngine with complex reward work as expected?""" # Initialize engine reward = rw.Reward( names=["work", "nox", "soot"], norms=[1.0, 5e-8, 1e-9], weights=[0.34, 0.33, 0.33], negative_reward=-100.0, randomize=False, ) eng = engines.ReactorEngine( nsteps=101, Tinj=300.0, rxnmech="dodecane_lu_nox.cti", mdot=0.1, max_minj=5e-5, ename="Isooctane_MBT_DI_50C_Summ.xlsx", reward=reward, ) env = DummyVecEnv([lambda: eng]) variables = eng.observables + eng.internals + eng.histories df = pd.DataFrame( columns=list( dict.fromkeys( variables + eng.action.actions + ["rewards"] + eng.reward.get_rewards() ) ) ) # Evaluate a dummy agent that injects at a fixed time t0 = time.time() done = False cnt = 0 obs = env.reset() df.loc[cnt, variables] = [eng.current_state[k] for k in variables] df.loc[cnt, eng.action.actions] = 0 rwd = list( eng.reward.compute(eng.current_state, eng.nsteps, False, False).values() ) df.loc[cnt, eng.reward.get_rewards()] = rwd df.loc[cnt, ["rewards"]] = [sum(rwd)] while not done: cnt += 1 # Agent tries to inject twice, but is not allowed the second time action = ( [1] if (eng.current_state["ca"] == 0) or eng.current_state["ca"] == 2 else [0] ) obs, reward, done, info = env.step(action) df.loc[cnt, variables] = [info[0]["current_state"][k] for k in variables] df.loc[cnt, eng.action.actions] = eng.action.current df.loc[cnt, ["rewards"]] = reward df.loc[cnt, eng.reward.get_rewards()] = list(info[0]["rewards"].values()) for rwd in eng.reward.get_rewards() + ["rewards"]: df[f"cumulative_{rwd}"] = np.cumsum(df[rwd]) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=6, name="reactor") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495) npt.assert_allclose(np.linalg.norm(df.p), 34254670.52877185, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df["T"]), 18668.46491609, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.rewards), 54.47632708, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.r_work), 53.47224436, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.r_nox), 14.10312665, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.w_work), 3.41695771, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.w_nox), 3.31645895, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.w_soot), 3.31645895, rtol=1e-5) npt.assert_allclose(np.linalg.norm(df.mdot), 0.14142135623730953) print(f"Wall time for ReactorEngine (complex reward) = {elapsed} seconds")
def test_discrete_twozone_engine_with_delay(self): """Does the DiscreteTwoZoneEngine with injection delay work as expected?""" # Initialize engine eng = engines.DiscreteTwoZoneEngine( nsteps=101, fuel="PRF100", rxnmech="llnl_gasoline_surrogate_323.xml", mdot=0.1, max_minj=5e-5, injection_delay=0.0025, ename="Isooctane_MBT_DI_50C_Summ.xlsx", reward=rw.Reward(negative_reward=-101.0), ) env = DummyVecEnv([lambda: eng]) variables = eng.observables + eng.internals + eng.histories df = pd.DataFrame( columns=list( dict.fromkeys( variables + eng.action.actions + ["rewards"] + eng.reward.get_rewards() ) ) ) # Evaluate a dummy agent that injects at a fixed time t0 = time.time() done = False cnt = 0 obs = env.reset() df.loc[cnt, variables] = [eng.current_state[k] for k in variables] df.loc[cnt, eng.action.actions] = 0 rwd = list( eng.reward.compute(eng.current_state, eng.nsteps, False, False).values() ) df.loc[cnt, eng.reward.get_rewards()] = rwd df.loc[cnt, ["rewards"]] = [sum(rwd)] while not done: cnt += 1 # Agent tries to inject thrice, but is not allowed the second time action = ( [1] if (eng.current_state["ca"] == -10) or eng.current_state["ca"] == 10 or eng.current_state["ca"] == 16 else [0] ) obs, reward, done, info = env.step(action) df.loc[cnt, variables] = [info[0]["current_state"][k] for k in variables] df.loc[cnt, eng.action.actions] = eng.action.current df.loc[cnt, ["rewards"]] = reward df.loc[cnt, eng.reward.get_rewards()] = list(info[0]["rewards"].values()) for rwd in eng.reward.get_rewards() + ["rewards"]: df[f"cumulative_{rwd}"] = np.cumsum(df[rwd]) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=5, name="DiscreteTwoZone (delay)") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495) npt.assert_allclose(np.linalg.norm(df.p), 35142241.61422163) npt.assert_allclose(np.linalg.norm(df["T"]), 20971.07323643) npt.assert_allclose(np.linalg.norm(df.rewards), 153.11736491) npt.assert_allclose(np.linalg.norm(df.mdot), 0.14142136) print(f"Wall time for DiscreteTwoZoneEngine with delay = {elapsed} seconds")