def test_distribution_with_rebalancing_integration():
    initial_state = np.array([0, 0, 10, 0, 0, 0, 0, 0, 10])[:, None]
    safety_stock = np.array([10, 5, 0, 10, 5, 0, 10, 5, 0])[:, None]

    env = get_distribution_with_rebalancing_example_env(
        np.array(initial_state)[:, None])
    agent = dwr_agent.DistributionWithRebalancingLocalPriorityAgent(
        env, safety_stock)
    simulator = ps.SncSimulator(env, agent, discount_factor=0.95)

    num_simulation_steps = 1000
    simulator.run(num_simulation_steps=num_simulation_steps)
Esempio n. 2
0
def run_simulations(
    num_sim: int, num_sim_steps: int, env: crw.ControlledRandomWalk,
    discount_factor: float
) -> Tuple[List[snc_types.ActionSpace], List[snc_types.StateSpace]]:
    """ Run multiple simulations on a given model and return all the actions and states.

    :param num_sim: The number of simulations to run.
    :param num_sim_steps: The number of simulation steps to run for each simulation.
    :param env: the environment to stepped through.
    :param discount_factor: discount factor used to compute the long term cost function.
    """
    data_actions = []  # type: List[snc_types.ActionSpace]
    data_states = []  # type: List[snc_types.StateSpace]

    num_steps = num_sim * num_sim_steps
    # Set Up Handlers
    handlers = [
        ProgressBarHandler(num_simulation_steps=num_steps, trigger_frequency=1)
    ]  # type: List[Handler]

    # Create Reporter
    reporter = rep.Reporter(handlers=handlers)  # fill with handlers

    # Agent parameters
    overrides: Dict[str, Dict[str, Union[str, float]]] = {}
    ac_params, wk_params, si_params, po_params, hh_params, name, include_hedging \
        = load_agents.get_hedgehog_hyperparams(**overrides)

    for i in np.arange(num_sim):
        job_gen_seed = int(42 + i)
        np.random.seed(job_gen_seed)

        # Create Policy Simulator
        agent = BigStepHedgehogAgent(env, discount_factor, wk_params,
                                     hh_params, ac_params, si_params,
                                     po_params, include_hedging, name)
        simulator = ps.SncSimulator(env,
                                    agent,
                                    discount_factor=discount_factor)

        # Run Simulation
        data = simulator.run(num_simulation_steps=num_sim_steps,
                             reporter=reporter,
                             job_gen_seed=job_gen_seed)

        data_actions.extend(data["action"])
        data_states.extend(data["state"])
    return data_actions, data_states
def build_default_simple_reentrant_line_simulator(seed):
    """
    Helper function that returns a simulator to be used by the tests below.
    """

    env = examples.simple_reentrant_line_model(job_gen_seed=seed)
    overrides = {}
    ac_params, wk_params, si_params, po_params, hh_params, si_class, dp_params, name \
        = load_agents.get_hedgehog_hyperparams(**overrides)

    # Create Policy Simulator
    discount_factor = 0.95
    agent = BigStepHedgehogAgent(env, discount_factor, wk_params, hh_params,
                                 ac_params, si_params, po_params, si_class,
                                 dp_params, name)
    return ps.SncSimulator(env, agent, discount_factor=discount_factor)
 def test_integration_double_reentrant_line_model(self):
     seed = 42
     np.random.seed(seed)
     initial_state = 50 * np.ones((5, 1))
     env = examples.double_reentrant_line_model(
         alpha=1,
         mu1=4,
         mu2=3,
         mu3=2,
         mu4=3,
         mu5=4,
         cost_per_buffer=np.array([1, 1, 1, 1, 1])[:, None],
         initial_state=initial_state,
         capacity=np.ones((5, 1)) * np.inf,
         job_conservation_flag=True,
         job_gen_seed=seed,
         max_episode_length=None)
     agent = mw.MaxWeightAgent(env)
     simulator = ps.SncSimulator(env, agent, discount_factor=0.95)
     data_mw = simulator.run(num_simulation_steps=1000)
     assert np.all(data_mw['state'][-1] < initial_state)
def test_scenario(scenario_name, agent_class):
    """ Run a brief integration test on a given scenario
    """
    skip_tests = SKIPPED_TESTS + PULL_MODELS + PUSH_PULL_MODELS
    if scenario_name in skip_tests:
        pytest.skip()

    np.random.seed(SEED_NO)

    _, env = scenarios.load_scenario(scenario_name, SEED_NO)
    # Update parameters for quick tests.
    overrides = {
        "HedgehogHyperParams": {
            "theta_0": 0.5,
            "horizon_drain_time_ratio": 0.1,
            "horizon_mpc_ratio": 0.1,
            "minimum_horizon": 10
        },
        "AsymptoticCovarianceParams": {
            "num_presimulation_steps": 100,
            "num_batch": 20
        }
    }

    if scenario_name in MIP_REQUIRED_MODELS:
        overrides["HedgehogHyperParams"]["mpc_policy_class_name"] = "FeedbackMipFeasibleMpcPolicy"

    ac_params, wk_params, si_params, po_params, hh_params, si_class, dp_params, name \
        = load_agents.get_hedgehog_hyperparams(**overrides)
    discount_factor = 0.95
    if agent_class == BigStepHedgehogAgent:
        agent = agent_class(env, discount_factor, wk_params, hh_params, ac_params,
                            si_params, po_params, si_class, dp_params, name)
    elif agent_class == BigStepHedgehogGTOAgent:
        agent = agent_class(env, discount_factor, wk_params, hh_params, ac_params,
                            po_params, dp_params, name)
    else:
        assert False, f"Not recognised agent: {agent_class}"
    simulator = ps.SncSimulator(env, agent, discount_factor=discount_factor)
    simulator.run(num_simulation_steps=SIM_STEPS)
def run_validation(arguments: argparse.Namespace) -> Dict[str, str]:
    """
    Run the validation on a particular scenario.

    :param arguments: Namespace of experiment parameters.
    """
    assert arguments.env_param_overrides['job_gen_seed'] is not None
    assert arguments.seed is not None
    # Note that if job_gen_seed was not in env_param_overrides, then at this point we will have:
    #   arguments.env_param_overrides['job_gen_seed'] == arguments.seed.
    job_gen_seed = arguments.env_param_overrides['job_gen_seed']
    global_seed = arguments.seed + 100
    agent_seed = arguments.seed + 200
    mpc_seed = arguments.seed + 300
    np.random.seed(global_seed)
    print(f"job_gen_seed {job_gen_seed}")
    print(f"global_seed {global_seed}")
    print(f"agent_seed {agent_seed}")
    print(f"mpc_seed {mpc_seed}")
    save_locations = dict()

    # Get Scenario
    _, env = scenarios.load_scenario(arguments.env_name, job_gen_seed,
                                     arguments.env_param_overrides)

    # Initialise an agent counter to ensure that the right checkpoint is loaded for each agent.
    rl_agent_count = 0
    for agent_name in arguments.agents:
        env.reset_with_random_state(job_gen_seed)
        agent_args = {}
        name_alias = agent_name  # Set name of folder storing results to agent_name by default.
        if agent_name in load_agents.HEDGEHOG_AGENTS:
            if arguments.hedgehog_param_overrides is None:
                arguments.hedgehog_param_overrides = dict()
            agent_args['hh_overrides'] = arguments.hedgehog_param_overrides
            agent_args['discount_factor'] = arguments.discount_factor
            agent_args['debug_info'] = arguments.debug_info
            agent_args['agent_seed'] = agent_seed
            agent_args['mpc_seed'] = mpc_seed
            # Replace directory name if name passed as an agent parameter.
            name_alias = arguments.hedgehog_param_overrides.get(
                'name', agent_name)
        elif agent_name == 'distribution_with_rebalancing_heuristic':
            agent_args['safety_stocks'] = 20 * np.ones(env.state.shape)
        elif agent_name in ['reinforce', 'ppo']:
            agent_args['discount_factor'] = arguments.discount_factor
            if arguments.rl_agent_params:
                # Update agent_args accordingly.
                if rl_agent_count < len(arguments.rl_agent_params):
                    if 'discount_factor' in arguments.rl_agent_params[
                            rl_agent_count]:
                        warn(
                            'WARNING: Overriding provided discount factor with agent specific '
                            'discount factor for {agent_name} agent')
                agent_args.update(arguments.rl_agent_params[rl_agent_count])
            else:
                if agent_name == "ppo":
                    raise ValueError(
                        "When running a PPO agent you must provide agent parameters."
                    )
                else:
                    warn("REINFORCE agent being run default agent parameters.")
            agent_args['rl_checkpoint'] = arguments.rl_checkpoints[
                rl_agent_count]
            rl_agent_count += 1
        elif agent_name == 'maxweight' or agent_name == 'scheduling_maxweight':
            if arguments.maxweight_param_overrides is None:
                arguments.maxweight_param_overrides = dict()
            agent_args['overrides'] = arguments.maxweight_param_overrides
            agent_args['agent_seed'] = agent_seed
            agent_args['mpc_seed'] = mpc_seed
            # Replace directory name if name passed as an agent parameter.
            name_alias = arguments.maxweight_param_overrides.get(
                'name', agent_name)
        else:
            agent_args['agent_seed'] = agent_seed

        agent = load_agents.get_agent(agent_name, env, **agent_args)
        sim = ps.SncSimulator(env, agent, **arguments.__dict__)

        print(f'\nSimulating {agent.name}...')
        validation_utils.print_agent_params(agent)

        is_hedgehog = isinstance(
            agent, (BigStepHedgehogAgent, PureFeedbackStationaryHedgehogAgent,
                    PureFeedbackMIPHedgehogAgent))
        save_location = f'{arguments.logdir}/{name_alias}'
        run_policy(sim, arguments.num_steps, arguments.server_mode,
                   is_hedgehog, save_location, job_gen_seed)

        if is_hedgehog:
            assert isinstance(
                agent,
                (BigStepHedgehogAgent, PureFeedbackStationaryHedgehogAgent,
                 PureFeedbackMIPHedgehogAgent))
            validation_utils.print_workload_to_physical_resources_indexes(
                agent.workload_tuple.nu)

        save_locations[agent.name] = save_location
        print(f'Data stored at: {save_location}.')
        print(f'Finished simulating {agent.name}.\n')

    print(f"job_gen_seed: {arguments.env_param_overrides.get('job_gen_seed')}")
    print("End of simulation!")
    if not arguments.server_mode:
        plt.ioff()
        plt.show()
    return save_locations