Beispiel #1
0
    def test_calibrated_agent(self):
        """Does the calibrated agent work as expected?"""

        # Initialize engine
        eng = engines.ContinuousTwoZoneEngine(
            nsteps=100,
            use_qdot=True,
            fuel="PRF100",
            rxnmech="llnl_gasoline_surrogate_323.xml",
            ename="Isooctane_MBT_DI_50C_Summ.xlsx",
        )

        # Initialize the agent
        env = DummyVecEnv([lambda: eng])
        agent = agents.CalibratedAgent(env)
        agent.learn()

        # Evaluate the agent
        t0 = time.time()
        df, total_reward = utilities.evaluate_agent(env, agent)
        elapsed = time.time() - t0
        utilities.plot_df(env, df, idx=0, name="calibrated")

        # Test
        npt.assert_allclose(np.linalg.norm(df.V), 0.002195212151)
        npt.assert_allclose(np.linalg.norm(df.p), 22012100.17143623)
        npt.assert_allclose(np.linalg.norm(df["T"]), 14210.47662980)
        npt.assert_allclose(np.linalg.norm(df.rewards), 104.47362155)
        npt.assert_allclose(np.linalg.norm(df.mdot), 0.04144044)
        npt.assert_allclose(np.linalg.norm(df.qdot), 97686.91574242)
        print(f"Wall time for CalibratedAgent = {elapsed} seconds")
Beispiel #2
0
    def test_exhaustive_agent(self):
        """Does the exhaustive agent work as expected?"""

        # Initialize engine
        eng = engines.DiscreteTwoZoneEngine(
            nsteps=101,
            mdot=0.1,
            max_minj=2.5e-5,
            fuel="dodecane",
            rxnmech="dodecane_lu_nox.cti",
            ename="Isooctane_MBT_DI_50C_Summ.xlsx",
            reward=rw.Reward(negative_reward=-0.05),
        )
        env = DummyVecEnv([lambda: eng])
        variables = eng.observables + eng.internals + eng.histories
        df = pd.DataFrame(
            columns=list(dict.fromkeys(variables + eng.action.actions + ["rewards"]))
        )

        # Initialize the agent
        agent = agents.ExhaustiveAgent(env)
        agent.learn()

        # Evaluate the agent
        t0 = time.time()
        df, total_reward = utilities.evaluate_agent(env, agent)
        elapsed = time.time() - t0
        utilities.plot_df(env, df, idx=1, name="exhaustive")

        # Test
        npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495)
        npt.assert_allclose(np.linalg.norm(df.p), 25431213.9403193)
        npt.assert_allclose(np.linalg.norm(df["T"]), 13611.58370927)
        npt.assert_allclose(np.linalg.norm(df.rewards), 101.41373957)
        npt.assert_allclose(np.linalg.norm(df.mdot), 0.1)
        print(f"Wall time for ExhaustiveAgent = {elapsed} seconds")
Beispiel #3
0
                tensorboard_log=logdir,
            )
        agent.learn(
            total_timesteps=agent_params["number_episodes"].value *
            (eng_params["nsteps"].value - 1) * agent_params["nranks"].value,
            callback=callback,
        )
    elif agent_params["agent"].value == "manual":
        env = DummyVecEnv([lambda: eng])
        agent = agents.ManualAgent(env)
        agent.learn(agent_params["injection_cas"].value,
                    agent_params["qdot_cas"].value)

    # Save, evaluate, and plot the agent
    pfx = os.path.join(logdir, "agent")
    agent.save(pfx)
    env = DummyVecEnv([lambda: eng])
    df, total_reward = utilities.evaluate_agent(env, agent)

    df.to_csv(pfx + ".csv", index=False)
    utilities.plot_df(env, df, idx=0, name=agent_params["agent"].value)
    utilities.save_plots(pfx + ".pdf")

    # Plot the training history
    logs = pd.read_csv(logname)
    utilities.plot_training(logs, os.path.join(logdir, "logger.pdf"))

    # output timer
    end = time.time() - start
    print(f"Elapsed time {timedelta(seconds=end)} (or {end} seconds)")
Beispiel #4
0
    def test_equilibrate_engine(self):
        """Does the EquilibrateEngine work as expected?"""

        # Initialize engine
        eng = engines.EquilibrateEngine(
            nsteps=101,
            Tinj=300.0,
            rxnmech="dodecane_lu_nox.cti",
            mdot=0.1,
            max_minj=5e-5,
            ename="Isooctane_MBT_DI_50C_Summ.xlsx",
            reward=rw.Reward(negative_reward=-0.05),
        )
        env = DummyVecEnv([lambda: eng])
        variables = eng.observables + eng.internals + eng.histories
        df = pd.DataFrame(
            columns=list(
                dict.fromkeys(
                    variables
                    + eng.action.actions
                    + ["rewards"]
                    + eng.reward.get_rewards()
                )
            )
        )

        # Evaluate a dummy agent that injects at a fixed time
        t0 = time.time()
        done = False
        cnt = 0
        obs = env.reset()
        df.loc[cnt, variables] = [eng.current_state[k] for k in variables]
        df.loc[cnt, eng.action.actions] = 0
        rwd = list(
            eng.reward.compute(eng.current_state, eng.nsteps, False, False).values()
        )
        df.loc[cnt, eng.reward.get_rewards()] = rwd
        df.loc[cnt, ["rewards"]] = [sum(rwd)]

        while not done:
            cnt += 1
            # Agent tries to inject twice, but is not allowed the second time
            action = (
                [1]
                if (eng.current_state["ca"] == -10) or eng.current_state["ca"] == 10
                else [0]
            )
            obs, reward, done, info = env.step(action)
            df.loc[cnt, variables] = [info[0]["current_state"][k] for k in variables]
            df.loc[cnt, eng.action.actions] = eng.action.current
            df.loc[cnt, ["rewards"]] = reward
            df.loc[cnt, eng.reward.get_rewards()] = list(info[0]["rewards"].values())

        for rwd in eng.reward.get_rewards() + ["rewards"]:
            df[f"cumulative_{rwd}"] = np.cumsum(df[rwd])

        elapsed = time.time() - t0

        utilities.plot_df(env, df, idx=4, name="EQ")

        # Test
        npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495)
        npt.assert_allclose(np.linalg.norm(df.p), 35436062.48197973)
        npt.assert_allclose(np.linalg.norm(df["T"]), 12491.93935531)
        npt.assert_allclose(np.linalg.norm(df.rewards), 118.62610333)
        npt.assert_allclose(np.linalg.norm(df.mdot), 0.14142136)
        print(f"Wall time for EquilibrateEngine = {elapsed} seconds")
Beispiel #5
0
    def test_reactor_engine_with_complex_reward(self):
        """Does the ReactorEngine with complex reward work as expected?"""

        # Initialize engine
        reward = rw.Reward(
            names=["work", "nox", "soot"],
            norms=[1.0, 5e-8, 1e-9],
            weights=[0.34, 0.33, 0.33],
            negative_reward=-100.0,
            randomize=False,
        )
        eng = engines.ReactorEngine(
            nsteps=101,
            Tinj=300.0,
            rxnmech="dodecane_lu_nox.cti",
            mdot=0.1,
            max_minj=5e-5,
            ename="Isooctane_MBT_DI_50C_Summ.xlsx",
            reward=reward,
        )
        env = DummyVecEnv([lambda: eng])
        variables = eng.observables + eng.internals + eng.histories
        df = pd.DataFrame(
            columns=list(
                dict.fromkeys(
                    variables
                    + eng.action.actions
                    + ["rewards"]
                    + eng.reward.get_rewards()
                )
            )
        )

        # Evaluate a dummy agent that injects at a fixed time
        t0 = time.time()
        done = False
        cnt = 0
        obs = env.reset()
        df.loc[cnt, variables] = [eng.current_state[k] for k in variables]
        df.loc[cnt, eng.action.actions] = 0
        rwd = list(
            eng.reward.compute(eng.current_state, eng.nsteps, False, False).values()
        )
        df.loc[cnt, eng.reward.get_rewards()] = rwd
        df.loc[cnt, ["rewards"]] = [sum(rwd)]

        while not done:
            cnt += 1
            # Agent tries to inject twice, but is not allowed the second time
            action = (
                [1]
                if (eng.current_state["ca"] == 0) or eng.current_state["ca"] == 2
                else [0]
            )
            obs, reward, done, info = env.step(action)
            df.loc[cnt, variables] = [info[0]["current_state"][k] for k in variables]
            df.loc[cnt, eng.action.actions] = eng.action.current
            df.loc[cnt, ["rewards"]] = reward
            df.loc[cnt, eng.reward.get_rewards()] = list(info[0]["rewards"].values())

        for rwd in eng.reward.get_rewards() + ["rewards"]:
            df[f"cumulative_{rwd}"] = np.cumsum(df[rwd])

        elapsed = time.time() - t0

        utilities.plot_df(env, df, idx=6, name="reactor")

        # Test
        npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495)
        npt.assert_allclose(np.linalg.norm(df.p), 34254670.52877185, rtol=1e-5)
        npt.assert_allclose(np.linalg.norm(df["T"]), 18668.46491609, rtol=1e-5)
        npt.assert_allclose(np.linalg.norm(df.rewards), 54.47632708, rtol=1e-5)
        npt.assert_allclose(np.linalg.norm(df.r_work), 53.47224436, rtol=1e-5)
        npt.assert_allclose(np.linalg.norm(df.r_nox), 14.10312665, rtol=1e-5)
        npt.assert_allclose(np.linalg.norm(df.w_work), 3.41695771, rtol=1e-5)
        npt.assert_allclose(np.linalg.norm(df.w_nox), 3.31645895, rtol=1e-5)
        npt.assert_allclose(np.linalg.norm(df.w_soot), 3.31645895, rtol=1e-5)
        npt.assert_allclose(np.linalg.norm(df.mdot), 0.14142135623730953)
        print(f"Wall time for ReactorEngine (complex reward) = {elapsed} seconds")
Beispiel #6
0
    def test_discrete_twozone_engine_with_delay(self):
        """Does the DiscreteTwoZoneEngine with injection delay work as expected?"""

        # Initialize engine
        eng = engines.DiscreteTwoZoneEngine(
            nsteps=101,
            fuel="PRF100",
            rxnmech="llnl_gasoline_surrogate_323.xml",
            mdot=0.1,
            max_minj=5e-5,
            injection_delay=0.0025,
            ename="Isooctane_MBT_DI_50C_Summ.xlsx",
            reward=rw.Reward(negative_reward=-101.0),
        )
        env = DummyVecEnv([lambda: eng])
        variables = eng.observables + eng.internals + eng.histories
        df = pd.DataFrame(
            columns=list(
                dict.fromkeys(
                    variables
                    + eng.action.actions
                    + ["rewards"]
                    + eng.reward.get_rewards()
                )
            )
        )

        # Evaluate a dummy agent that injects at a fixed time
        t0 = time.time()
        done = False
        cnt = 0
        obs = env.reset()
        df.loc[cnt, variables] = [eng.current_state[k] for k in variables]
        df.loc[cnt, eng.action.actions] = 0
        rwd = list(
            eng.reward.compute(eng.current_state, eng.nsteps, False, False).values()
        )
        df.loc[cnt, eng.reward.get_rewards()] = rwd
        df.loc[cnt, ["rewards"]] = [sum(rwd)]

        while not done:
            cnt += 1
            # Agent tries to inject thrice, but is not allowed the second time
            action = (
                [1]
                if (eng.current_state["ca"] == -10)
                or eng.current_state["ca"] == 10
                or eng.current_state["ca"] == 16
                else [0]
            )
            obs, reward, done, info = env.step(action)
            df.loc[cnt, variables] = [info[0]["current_state"][k] for k in variables]
            df.loc[cnt, eng.action.actions] = eng.action.current
            df.loc[cnt, ["rewards"]] = reward
            df.loc[cnt, eng.reward.get_rewards()] = list(info[0]["rewards"].values())

        for rwd in eng.reward.get_rewards() + ["rewards"]:
            df[f"cumulative_{rwd}"] = np.cumsum(df[rwd])

        elapsed = time.time() - t0

        utilities.plot_df(env, df, idx=5, name="DiscreteTwoZone (delay)")

        # Test
        npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495)
        npt.assert_allclose(np.linalg.norm(df.p), 35142241.61422163)
        npt.assert_allclose(np.linalg.norm(df["T"]), 20971.07323643)
        npt.assert_allclose(np.linalg.norm(df.rewards), 153.11736491)
        npt.assert_allclose(np.linalg.norm(df.mdot), 0.14142136)
        print(f"Wall time for DiscreteTwoZoneEngine with delay = {elapsed} seconds")
Beispiel #7
0
                    rxnmech=eng_params["rxnmech"].value,
                    observables=eng_params["observables"].value,
                    twozone_phi=eng_params["twozone_phi"].value,
                    use_qdot=eng_params["use_qdot"].value,
                )

        env = DummyVecEnv([lambda: eng])
        agent_params = params.inputs["agent"]
        if agent_params["agent"].value == "calibrated":
            agent = agents.CalibratedAgent(env)
            agent.learn()
        elif agent_params["agent"].value == "exhaustive":
            agent = agents.ExhaustiveAgent(env)
            agent.load(fname, env)
        elif agent_params["agent"].value == "ppo":
            agent = PPO2.load(fname, env=env)
        elif agent_params["agent"].value == "manual":
            agent = agents.ManualAgent(env=env)
            agent.load(fname, env)

        df, total_reward = utilities.evaluate_agent(env, agent)
        print(f"The total reward for {fname} is {total_reward}.")
        print(f"The total work for {fname} is {df.work.iloc[-1]}.")
        if "nox" in df.columns:
            print(f"The EOC NOx for {fname} is {df.nox.iloc[-1]}.")
        name = agent_params[
            "agent"].value if args.labels is None else args.labels[k]
        utilities.plot_df(env, df, idx=k, name=name, plot_exp=args.exp)

    utilities.save_plots("compare.pdf", legends=args.legends)