def test_distribution_with_rebalancing_integration(): initial_state = np.array([0, 0, 10, 0, 0, 0, 0, 0, 10])[:, None] safety_stock = np.array([10, 5, 0, 10, 5, 0, 10, 5, 0])[:, None] env = get_distribution_with_rebalancing_example_env( np.array(initial_state)[:, None]) agent = dwr_agent.DistributionWithRebalancingLocalPriorityAgent( env, safety_stock) simulator = ps.SncSimulator(env, agent, discount_factor=0.95) num_simulation_steps = 1000 simulator.run(num_simulation_steps=num_simulation_steps)
def run_simulations( num_sim: int, num_sim_steps: int, env: crw.ControlledRandomWalk, discount_factor: float ) -> Tuple[List[snc_types.ActionSpace], List[snc_types.StateSpace]]: """ Run multiple simulations on a given model and return all the actions and states. :param num_sim: The number of simulations to run. :param num_sim_steps: The number of simulation steps to run for each simulation. :param env: the environment to stepped through. :param discount_factor: discount factor used to compute the long term cost function. """ data_actions = [] # type: List[snc_types.ActionSpace] data_states = [] # type: List[snc_types.StateSpace] num_steps = num_sim * num_sim_steps # Set Up Handlers handlers = [ ProgressBarHandler(num_simulation_steps=num_steps, trigger_frequency=1) ] # type: List[Handler] # Create Reporter reporter = rep.Reporter(handlers=handlers) # fill with handlers # Agent parameters overrides: Dict[str, Dict[str, Union[str, float]]] = {} ac_params, wk_params, si_params, po_params, hh_params, name, include_hedging \ = load_agents.get_hedgehog_hyperparams(**overrides) for i in np.arange(num_sim): job_gen_seed = int(42 + i) np.random.seed(job_gen_seed) # Create Policy Simulator agent = BigStepHedgehogAgent(env, discount_factor, wk_params, hh_params, ac_params, si_params, po_params, include_hedging, name) simulator = ps.SncSimulator(env, agent, discount_factor=discount_factor) # Run Simulation data = simulator.run(num_simulation_steps=num_sim_steps, reporter=reporter, job_gen_seed=job_gen_seed) data_actions.extend(data["action"]) data_states.extend(data["state"]) return data_actions, data_states
def build_default_simple_reentrant_line_simulator(seed): """ Helper function that returns a simulator to be used by the tests below. """ env = examples.simple_reentrant_line_model(job_gen_seed=seed) overrides = {} ac_params, wk_params, si_params, po_params, hh_params, si_class, dp_params, name \ = load_agents.get_hedgehog_hyperparams(**overrides) # Create Policy Simulator discount_factor = 0.95 agent = BigStepHedgehogAgent(env, discount_factor, wk_params, hh_params, ac_params, si_params, po_params, si_class, dp_params, name) return ps.SncSimulator(env, agent, discount_factor=discount_factor)
def test_integration_double_reentrant_line_model(self): seed = 42 np.random.seed(seed) initial_state = 50 * np.ones((5, 1)) env = examples.double_reentrant_line_model( alpha=1, mu1=4, mu2=3, mu3=2, mu4=3, mu5=4, cost_per_buffer=np.array([1, 1, 1, 1, 1])[:, None], initial_state=initial_state, capacity=np.ones((5, 1)) * np.inf, job_conservation_flag=True, job_gen_seed=seed, max_episode_length=None) agent = mw.MaxWeightAgent(env) simulator = ps.SncSimulator(env, agent, discount_factor=0.95) data_mw = simulator.run(num_simulation_steps=1000) assert np.all(data_mw['state'][-1] < initial_state)
def test_scenario(scenario_name, agent_class): """ Run a brief integration test on a given scenario """ skip_tests = SKIPPED_TESTS + PULL_MODELS + PUSH_PULL_MODELS if scenario_name in skip_tests: pytest.skip() np.random.seed(SEED_NO) _, env = scenarios.load_scenario(scenario_name, SEED_NO) # Update parameters for quick tests. overrides = { "HedgehogHyperParams": { "theta_0": 0.5, "horizon_drain_time_ratio": 0.1, "horizon_mpc_ratio": 0.1, "minimum_horizon": 10 }, "AsymptoticCovarianceParams": { "num_presimulation_steps": 100, "num_batch": 20 } } if scenario_name in MIP_REQUIRED_MODELS: overrides["HedgehogHyperParams"]["mpc_policy_class_name"] = "FeedbackMipFeasibleMpcPolicy" ac_params, wk_params, si_params, po_params, hh_params, si_class, dp_params, name \ = load_agents.get_hedgehog_hyperparams(**overrides) discount_factor = 0.95 if agent_class == BigStepHedgehogAgent: agent = agent_class(env, discount_factor, wk_params, hh_params, ac_params, si_params, po_params, si_class, dp_params, name) elif agent_class == BigStepHedgehogGTOAgent: agent = agent_class(env, discount_factor, wk_params, hh_params, ac_params, po_params, dp_params, name) else: assert False, f"Not recognised agent: {agent_class}" simulator = ps.SncSimulator(env, agent, discount_factor=discount_factor) simulator.run(num_simulation_steps=SIM_STEPS)
def run_validation(arguments: argparse.Namespace) -> Dict[str, str]: """ Run the validation on a particular scenario. :param arguments: Namespace of experiment parameters. """ assert arguments.env_param_overrides['job_gen_seed'] is not None assert arguments.seed is not None # Note that if job_gen_seed was not in env_param_overrides, then at this point we will have: # arguments.env_param_overrides['job_gen_seed'] == arguments.seed. job_gen_seed = arguments.env_param_overrides['job_gen_seed'] global_seed = arguments.seed + 100 agent_seed = arguments.seed + 200 mpc_seed = arguments.seed + 300 np.random.seed(global_seed) print(f"job_gen_seed {job_gen_seed}") print(f"global_seed {global_seed}") print(f"agent_seed {agent_seed}") print(f"mpc_seed {mpc_seed}") save_locations = dict() # Get Scenario _, env = scenarios.load_scenario(arguments.env_name, job_gen_seed, arguments.env_param_overrides) # Initialise an agent counter to ensure that the right checkpoint is loaded for each agent. rl_agent_count = 0 for agent_name in arguments.agents: env.reset_with_random_state(job_gen_seed) agent_args = {} name_alias = agent_name # Set name of folder storing results to agent_name by default. if agent_name in load_agents.HEDGEHOG_AGENTS: if arguments.hedgehog_param_overrides is None: arguments.hedgehog_param_overrides = dict() agent_args['hh_overrides'] = arguments.hedgehog_param_overrides agent_args['discount_factor'] = arguments.discount_factor agent_args['debug_info'] = arguments.debug_info agent_args['agent_seed'] = agent_seed agent_args['mpc_seed'] = mpc_seed # Replace directory name if name passed as an agent parameter. name_alias = arguments.hedgehog_param_overrides.get( 'name', agent_name) elif agent_name == 'distribution_with_rebalancing_heuristic': agent_args['safety_stocks'] = 20 * np.ones(env.state.shape) elif agent_name in ['reinforce', 'ppo']: agent_args['discount_factor'] = arguments.discount_factor if arguments.rl_agent_params: # Update agent_args accordingly. if rl_agent_count < len(arguments.rl_agent_params): if 'discount_factor' in arguments.rl_agent_params[ rl_agent_count]: warn( 'WARNING: Overriding provided discount factor with agent specific ' 'discount factor for {agent_name} agent') agent_args.update(arguments.rl_agent_params[rl_agent_count]) else: if agent_name == "ppo": raise ValueError( "When running a PPO agent you must provide agent parameters." ) else: warn("REINFORCE agent being run default agent parameters.") agent_args['rl_checkpoint'] = arguments.rl_checkpoints[ rl_agent_count] rl_agent_count += 1 elif agent_name == 'maxweight' or agent_name == 'scheduling_maxweight': if arguments.maxweight_param_overrides is None: arguments.maxweight_param_overrides = dict() agent_args['overrides'] = arguments.maxweight_param_overrides agent_args['agent_seed'] = agent_seed agent_args['mpc_seed'] = mpc_seed # Replace directory name if name passed as an agent parameter. name_alias = arguments.maxweight_param_overrides.get( 'name', agent_name) else: agent_args['agent_seed'] = agent_seed agent = load_agents.get_agent(agent_name, env, **agent_args) sim = ps.SncSimulator(env, agent, **arguments.__dict__) print(f'\nSimulating {agent.name}...') validation_utils.print_agent_params(agent) is_hedgehog = isinstance( agent, (BigStepHedgehogAgent, PureFeedbackStationaryHedgehogAgent, PureFeedbackMIPHedgehogAgent)) save_location = f'{arguments.logdir}/{name_alias}' run_policy(sim, arguments.num_steps, arguments.server_mode, is_hedgehog, save_location, job_gen_seed) if is_hedgehog: assert isinstance( agent, (BigStepHedgehogAgent, PureFeedbackStationaryHedgehogAgent, PureFeedbackMIPHedgehogAgent)) validation_utils.print_workload_to_physical_resources_indexes( agent.workload_tuple.nu) save_locations[agent.name] = save_location print(f'Data stored at: {save_location}.') print(f'Finished simulating {agent.name}.\n') print(f"job_gen_seed: {arguments.env_param_overrides.get('job_gen_seed')}") print("End of simulation!") if not arguments.server_mode: plt.ioff() plt.show() return save_locations