def test_get_hedgehog_hyperparams_overwrite_multiple(): overrides = { 'BigStepPenaltyPolicyParams': { 'boolean_action_flag': False, 'convex_solver': 'fake_solver', 'safety_penalty_coeff': 21, 'nonidling_penalty_coeff': 12 }, 'HedgehogHyperParams': { 'activity_rates_policy_class_name': 'BigStepPenaltyPolicy', 'horizon_drain_time_ratio': 0.99, 'horizon_mpc_ratio': 0.55, 'minimum_horizon': 0.77, 'mpc_policy_class_name': 'fake_mpc_class', 'theta_0': 99 }, 'name': 'funny_name' } updated_params = load_agents.get_hedgehog_hyperparams(**overrides) po_params = BigStepPenaltyPolicyParams(**overrides['BigStepPenaltyPolicyParams']) hh_params = HedgehogHyperParams(**overrides['HedgehogHyperParams']) name = overrides['name'] ac_params, wk_params, si_params, _, _, si_class, dp_params, _ \ = get_hedgehog_default_values('BigStepPenaltyPolicy') assert_equal_hedgehog_parameters(updated_params, ac_params, wk_params, si_params, po_params, hh_params, si_class, dp_params, name)
def test_get_policy_examples_two_alternative_methods_multiple_horizons( ex_env, horizon): seed = 42 np.random.seed(seed) env = eval('examples.' + ex_env)(job_gen_seed=seed) kappa_w = 1e2 kappa = 1e2 safety_stocks_vec = 2 * np.ones((env.num_resources, 1)) policy_params = BigStepPenaltyPolicyParams('cvx.CPLEX', False, kappa_w, kappa) load, workload_mat, nu = workload.compute_load_workload_matrix(env) num_wl_vec = workload_mat.shape[0] k_idling_set = np.random.randint(0, workload_mat.shape[0], 1) draining_bottlenecks = set(np.random.randint(0, workload_mat.shape[0], 1)) state = 20 * np.ones((env.num_buffers, 1)) policy_cvx = BigStepPolicy(env.cost_per_buffer, env.constituency_matrix, env.job_generator.demand_rate, env.job_generator.buffer_processing_matrix, workload_mat, nu, env.list_boundary_constraint_matrices, env.ind_surplus_buffers, policy_params) kwargs = { 'safety_stocks_vec': safety_stocks_vec, 'k_idling_set': k_idling_set, 'draining_bottlenecks': draining_bottlenecks, 'horizon': horizon } z_star_cvx, opt_val_cvx = policy_cvx.get_policy(state, **kwargs) allowed_activities = get_allowed_activities(policy_cvx, num_wl_vec, k_idling_set) z_star_scipy, opt_val_scipy = policy_utils.feedback_policy_nonidling_penalty_scipy( state, env.cost_per_buffer, env.constituency_matrix, env.job_generator.buffer_processing_matrix, workload_mat, safety_stocks_vec, k_idling_set, draining_bottlenecks, kappa_w, env.list_boundary_constraint_matrices, allowed_activities, method='revised simplex', demand_rate=env.job_generator.demand_rate, horizon=horizon) # SciPy has safety stock constraints, while CVX has a safety stock penalty. opt_val_cvx = ( env.cost_per_buffer.T @ env.job_generator.buffer_processing_matrix @ z_star_cvx)[0] opt_val_scipy = ( env.cost_per_buffer.T @ env.job_generator.buffer_processing_matrix @ z_star_scipy[:, None])[0] np.testing.assert_allclose(opt_val_cvx, opt_val_scipy, rtol=5e-2)
def test_get_policy_negative_kappa_w_kappa_horizon(some_vars_fixture): kappa_w, kappa, horizon = some_vars_fixture num_buffers = 4 num_activities = 5 num_resources = 5 num_wl_vec = 4 constituency_matrix = np.eye(num_resources) list_boundary_constraint_matrices = [ np.array([[1, 0]]), np.array([[0, 1]]) ] ind_surplus_buffers = None cost_per_buffer = np.zeros((num_buffers, 1)) demand_rate = np.zeros((num_buffers, 1)) buffer_processing_matrix = np.zeros((num_buffers, num_activities)) workload_mat = np.zeros((num_wl_vec, num_buffers)) nu = np.zeros(num_resources) boolean_action_flag = False, convex_solver = 'cvx.CPLEX', policy_params = BigStepPenaltyPolicyParams(convex_solver, boolean_action_flag, kappa_w, kappa) with pytest.raises(AssertionError): _ = BigStepPolicy(cost_per_buffer, constituency_matrix, demand_rate, buffer_processing_matrix, workload_mat, nu, list_boundary_constraint_matrices, ind_surplus_buffers, policy_params)
def get_policy_params(policy_class): if isinstance(policy_class, (BigStepLayeredPolicy, BigStepSurplusLayeredPolicy)): return BigStepLayeredPolicyParams(convex_solver='cvx.CPLEX') else: return BigStepPenaltyPolicyParams( convex_solver='cvx.CPLEX', boolean_action_flag=False, nonidling_penalty_coeff=1e2, safety_penalty_coeff=1e2 )
def get_penalty_policy_object_for_simple_link_constrained_model( env, kappa_w, kappa): _, workload_mat, nu = workload.compute_load_workload_matrix(env, 6) policy_params = BigStepPenaltyPolicyParams('cvx.CPLEX', False, kappa_w, kappa) policy_obj = BigStepPolicy(env.cost_per_buffer, env.constituency_matrix, env.job_generator.demand_rate, env.job_generator.buffer_processing_matrix, env.workload_mat, env.nu, env.list_boundary_constraint_matrices, env.ind_surplus_buffers, policy_params) return policy_obj
def __init__( self, env: ControlledRandomWalk, discount_factor: float, workload_relaxation_params: WorkloadRelaxationParams, hedgehog_hyperparams: HedgehogHyperParams, asymptotic_covariance_params: AsymptoticCovarianceParams, strategic_idling_params: StrategicIdlingParams = StrategicIdlingParams('StrategicIdlingHedging', 'cvx.CPLEX', 0.01, 0.01, 1, 1e-5), policy_params: BigStepPenaltyPolicyParams = BigStepPenaltyPolicyParams( 'cvx.CPLEX', False, 100, 1000), strategic_idling_class: Type[ StrategicIdlingCore] = StrategicIdlingHedging, demand_planning_params: DemandPlanningParams = DemandPlanningParams( ), name: str = "BigStepActivityRatesHedgehogAgent", debug_info: bool = False, agent_seed: Optional[int] = None, mpc_seed: Optional[int] = None) -> None: """ :param env: Environment to stepped through. :param discount_factor: Discount factor for the cost per time step. :param workload_relaxation_params: Tuple of parameters defining the first workload relaxation. :param hedgehog_hyperparams: Tuple of parameters defining penalties for safety stocks. :param asymptotic_covariance_params: Tuple of parameters used to generate an estimate of the asymptotic covariance matrix. :param strategic_idling_params: Tuple of parameters that specify the solver to be used at the different steps when finding potential idling directions, as well as some tolerance parameters. :param policy_params: Tuple of parameters that specify the solver to be used by the policy. :param strategic_idling_class: Class to be used to make strategic idling decisions. Pure feedback Hedgehog only accepts StrategicIdlingCore or StrategicIdlingHedging. :param name: Agent identifier. :param debug_info: Flat to print instantaneous calculations useful for debugging. :param agent_seed: Random seed for agent's random number generator. :param mpc_seed: Random seed for MPC policy's random number generator. """ self.validate_params(hedgehog_hyperparams, policy_params, strategic_idling_class) super().__init__(env, discount_factor, workload_relaxation_params, hedgehog_hyperparams, asymptotic_covariance_params, strategic_idling_params, policy_params, strategic_idling_class, demand_planning_params, name, debug_info, agent_seed, mpc_seed)
def test_get_policy_non_empty_buffers(): """ We have a serial line of two resources with one buffer each: demand --> q_1 + resource_1 --> q_2 + resource_2 --> We relax the nonidling constraint for first resource. Since the cost in the second resource is much higher, the LP will make the first resource to idle. """ k_idling_set = np.array([0]) draining_bottlenecks = {} kappa_w = 1e2 # Very high to force the influence of the constraint kappa = 1e2 policy_params = BigStepPenaltyPolicyParams('cvx.CPLEX', False, kappa_w, kappa) state = np.array([[10], [10]]) c_1 = 1 c_2 = 100 mu_1 = 100 mu_2 = 1 alpha = 0.9 cost_per_buffer = np.array([[c_1], [c_2]]) demand_rate = np.array([[alpha], [0]]) buffer_processing_matrix = np.array([[-mu_1, 0], [mu_1, -mu_2]]) constituency_matrix = np.eye(2) safety_stocks_vec = np.zeros((2, 1)) list_boundary_constraint_matrices = [] ind_surplus_buffers = None workload_mat = np.array([[1 / mu_1, 0], [1 / mu_2, 1 / mu_2]]) nu = np.eye(2) horizon = 100 policy = BigStepPolicy(cost_per_buffer, constituency_matrix, demand_rate, buffer_processing_matrix, workload_mat, nu, list_boundary_constraint_matrices, ind_surplus_buffers, policy_params) kwargs = { 'safety_stocks_vec': safety_stocks_vec, 'k_idling_set': k_idling_set, 'draining_bottlenecks': draining_bottlenecks, 'horizon': horizon } z_star, _ = policy.get_policy(state, **kwargs) np.testing.assert_allclose(z_star, np.array([[0], [1]]), rtol=1e-2, atol=1e-2)
def test_big_step_policy_k_idling_set_relax_2nd_resource(): """ We have a serial line of two resources with one buffer each: demand --> q_1 + resource_1 --> q_2 + resource_2 --> We relax the nonidling constraint for second resource. So even when the cost in the second resource is much higher than the first resource, the LP will make the first resource to work. """ k_idling_set = np.array([]) draining_bottlenecks = {1} kappa_w = 1e4 kappa = 1e2 state = np.array([[10], [10]]) c_1 = 1 c_2 = 100 mu_1 = 100 mu_2 = 1 cost_per_buffer = np.array([[c_1], [c_2]]) constituency_matrix = np.eye(2) list_boundary_constraint_matrices = [ np.array([[1, 0]]), np.array([[0, 1]]) ] ind_surplus_buffers = None demand_rate = np.ones((2, 1)) buffer_processing_matrix = np.array([[-mu_1, 0], [mu_1, -mu_2]]) workload_mat = np.array([[1 / mu_1, 0], [1 / mu_2, 1 / mu_2]]) nu = np.eye(2) safety_stocks_vec = np.zeros((2, 1)) horizon = 1 policy_params = BigStepPenaltyPolicyParams('cvx.CPLEX', False, kappa_w, kappa) policy = BigStepPolicy(cost_per_buffer, constituency_matrix, demand_rate, buffer_processing_matrix, workload_mat, nu, list_boundary_constraint_matrices, ind_surplus_buffers, policy_params) kwargs = { 'safety_stocks_vec': safety_stocks_vec, 'k_idling_set': k_idling_set, 'draining_bottlenecks': draining_bottlenecks, 'horizon': horizon } z_star_pen, _ = policy.get_policy(state, **kwargs) with pytest.raises(AssertionError): np.testing.assert_almost_equal(z_star_pen[0], 0) np.testing.assert_almost_equal(z_star_pen[1], 1, decimal=5)
def test_cum_cost_computation_switching_curve_regime_homogenous_cost(): neg_log_discount_factor = - np.log(0.99999) env = examples.simple_reentrant_line_model(alpha1=0.33, mu1=0.7, mu2=0.34, mu3=0.7, cost_per_buffer=np.array([1, 1.001, 1])[:, None]) num_wl_vec = 2 load, workload_mat, nu = wl.compute_load_workload_matrix(env, num_wl_vec) strategic_idling_params = StrategicIdlingParams() workload_cov = np.array([[2, 0.5], [0.5, 3]]) kappa_w = np.sum(env.cost_per_buffer) * 10 kappa = 1e4 policy_params = BigStepPenaltyPolicyParams('cvx.CPLEX', False, kappa_w, kappa) policy_object = BigStepPolicy( env.cost_per_buffer, env.constituency_matrix, env.job_generator.demand_rate, env.job_generator.buffer_processing_matrix, workload_mat, nu, env.list_boundary_constraint_matrices, env.ind_surplus_buffers, policy_params) si_object = StrategicIdlingForesight(workload_mat=workload_mat, neg_log_discount_factor=neg_log_discount_factor, load=load, cost_per_buffer=env.cost_per_buffer, model_type=env.model_type, policy_object=policy_object, strategic_idling_params=strategic_idling_params, workload_cov=workload_cov) init_state = np.array([0, 0, 10000])[:, np.newaxis] init_w = workload_mat @ init_state si_object._current_state = init_state cw_vars = si_object._non_negative_workloads(init_w) cw_vars = super(StrategicIdlingForesight, si_object)._handle_switching_curve_regime(init_w, cw_vars) gto_cost = si_object._roll_out_gto_fluid_policy(cw_vars) std_hedging_cost = si_object._roll_out_hedgehog_fluid_policy(cw_vars) np.testing.assert_allclose(std_hedging_cost/1e6,1214,rtol=0.03) np.testing.assert_allclose(gto_cost/1e6,950,rtol=0.03) si_object.get_allowed_idling_directions(init_state) assert si_object._current_regime == "gto" assert si_object._original_target_dyn_bot_set == set([0,1])
def test_assertion_large_horizon_with_boolean_action_constraints(): horizon = 2 boolean_action_flag = True policy_params = BigStepPenaltyPolicyParams('cvx.CPLEX', boolean_action_flag, 1e2, 1e2) policy = BigStepPolicy( np.ones( (2, 1)), np.eye(2), np.ones((2, 1)), -1.1 * np.eye(2), np.eye(2), np.eye(2), [np.array([[1, 0]]), np.array([[0, 1]])], None, policy_params) kwargs = { 'safety_stocks_vec': np.zeros((2, 1)), 'k_idling_set': np.array([]), 'draining_bottlenecks': {}, 'horizon': horizon } with pytest.raises(AssertionError): policy.get_policy(np.zeros((2, 1)), **kwargs)
def test_get_hedgehog_hyperparams_overwrite_big_step_penalty_policy(): overrides = { 'BigStepPenaltyPolicyParams': { 'convex_solver': 'fake_solver', 'boolean_action_flag': True, 'safety_penalty_coeff': 33, 'nonidling_penalty_coeff': 55 }, 'HedgehogHyperParams': { 'activity_rates_policy_class_name': 'BigStepPenaltyPolicy' } } updated_params = load_agents.get_hedgehog_hyperparams(**overrides) po_params = BigStepPenaltyPolicyParams(**overrides['BigStepPenaltyPolicyParams']) ac_params, wk_params, si_params, _, hh_params, si_class, dp_params, name \ = get_hedgehog_default_values('BigStepPenaltyPolicy') assert_equal_hedgehog_parameters(updated_params, ac_params, wk_params, si_params, po_params, hh_params, si_class, dp_params, name)
def test_big_step_policy_safety_stock_active(): """ Resource is below safety stock threshold. Nonidling penalty is zero. Minimum cost is achieved by draining buffer 3. However, resource 1 will drain buffer 1, so buffer 2 reaches its safety stock level. """ state = np.array([0, 0, 100])[:, None] alpha1 = 8 mu1 = 20 mu2 = 10 mu3 = 20 safety_stocks_vec = 8 * np.ones((2, 1)) horizon = 22 cost_per_buffer = np.array([[1.5], [1], [2]]) z_star_true = np.array([0.4, 0, 0.6])[:, None] env = examples.simple_reentrant_line_model(alpha1, mu1, mu2, mu3, cost_per_buffer) load, workload_mat, nu = workload.compute_load_workload_matrix(env) kappa_w = 0 kappa = 1e2 k_idling_set = np.array([]) draining_bottlenecks = {} boolean_action_flag = False policy_params = BigStepPenaltyPolicyParams('cvx.CPLEX', boolean_action_flag, kappa_w, kappa) policy_cvx = BigStepPolicy(env.cost_per_buffer, env.constituency_matrix, env.job_generator.demand_rate, env.job_generator.buffer_processing_matrix, workload_mat, nu, env.list_boundary_constraint_matrices, env.ind_surplus_buffers, policy_params) kwargs = { 'safety_stocks_vec': safety_stocks_vec, 'k_idling_set': k_idling_set, 'draining_bottlenecks': draining_bottlenecks, 'horizon': horizon } z_star, _ = policy_cvx.get_policy(state, **kwargs) np.testing.assert_allclose(z_star, z_star_true, rtol=1e-4)
def perform_boolean_constraints_simple_reentrant_line_test(state, z_star_true): """ Set parameters such that the objective is: f(zeta) = -0.5 * mu1 * zeta_1 + 1 * mu2 * zeta_2 - 2 * mu3 * zeta_3. State dynamics constraint is: x_1 - mu1 * zeta_1 >= 0 x_2 + mu1 * zeta_1 - mu2 * zeta_2 >= 0 x_3 + mu2 * zeta_2 - mu3 * zeta_3 >= 0. LP prioritises working on buffer 3 as much as possible. The only motivation to work on buffers 1 and 2 is just to satisfy the state dynamics constraint. """ horizon = 1 boolean_action_flag = True cost_per_buffer = np.array([[1.5], [1], [2]]) env = examples.simple_reentrant_line_model(cost_per_buffer=cost_per_buffer) load, workload_mat, nu = workload.compute_load_workload_matrix(env) safety_stocks_vec = np.zeros((env.num_resources, 1)) kappa_w = 0 kappa = 0 k_idling_set = np.array([]) draining_bottlenecks = {} policy_params = BigStepPenaltyPolicyParams("cvx.CPLEX", boolean_action_flag, kappa_w, kappa) policy_cvx = BigStepPolicy(env.cost_per_buffer, env.constituency_matrix, env.job_generator.demand_rate, env.job_generator.buffer_processing_matrix, workload_mat, nu, env.list_boundary_constraint_matrices, env.ind_surplus_buffers, policy_params) kwargs = { 'safety_stocks_vec': safety_stocks_vec, 'k_idling_set': k_idling_set, 'draining_bottlenecks': draining_bottlenecks, 'horizon': horizon } z_star, _ = policy_cvx.get_policy(state, **kwargs) assert snc_tools.is_approx_binary(z_star) np.testing.assert_almost_equal(z_star, z_star_true)
def get_hedgehog_hyperparams(**overrides) -> Tuple: """ Get parameters for Hedgehog agent. If some parameter is not specified in overrides, then it takes a default value. :param overrides: Parameters coming from JSON file. :return: (ac_params, wk_params, si_params, po_params, hh_params, si_class, name): - ac_params: Hyperparameters to estimate the workload asymptotic covariance matrix. - wk_params: Hyperparameters to compute the workload matrix. - si_params: Hyperparameters for computing the strategic possible idling directions. - po_params: Hyperparameters for computing the activity rates policy. - hh_params: Hyperparameters for the main loop of the Hedgehog algorithm. - si_class: Strategic idling class. - dp_class: Demand planning class. - name: Agent name. """ ac_params = get_hedgehog_params_as_named_tuple( AsymptoticCovarianceParams(), **overrides) wk_params = get_hedgehog_params_as_named_tuple(WorkloadRelaxationParams(), **overrides) si_params = get_hedgehog_params_as_named_tuple(StrategicIdlingParams(), **overrides) # TODO: Get si_class from class name inside HedgehogAgentInterface. si_class = get_strategic_idling_class( cast(StrategicIdlingParams, si_params).strategic_idling_class) hh_params = get_hedgehog_params_as_named_tuple(HedgehogHyperParams(), **overrides) ar_policy_class_name = cast(HedgehogHyperParams, hh_params).activity_rates_policy_class_name if ar_policy_class_name == 'BigStepLayeredPolicy': po_params = get_hedgehog_params_as_named_tuple( BigStepLayeredPolicyParams(), **overrides) else: po_params = get_hedgehog_params_as_named_tuple( BigStepPenaltyPolicyParams(), **overrides) dp_params = get_hedgehog_params_as_named_tuple(DemandPlanningParams(), **overrides) name = overrides.get('name', "BigStepHedgehogAgent") return ac_params, wk_params, si_params, po_params, hh_params, si_class, dp_params, name
def get_hedgehog_default_values(activity_rates_policy_class_name="BigStepLayeredPolicy"): ac_params = AsymptoticCovarianceParams( num_presimulation_steps=10000, num_batch=200) wk_params = WorkloadRelaxationParams( num_vectors=None, load_threshold=None, convex_solver='cvx.CPLEX') si_params = StrategicIdlingParams( strategic_idling_class='StrategicIdlingForesight', convex_solver='cvx.CPLEX', epsilon=0.05, shift_eps=1e-2, hedging_scaling_factor=1, penalty_coeff_w_star=1e-5) si_class = load_agents.get_strategic_idling_class( cast(StrategicIdlingParams, si_params).strategic_idling_class) if activity_rates_policy_class_name == "BigStepLayeredPolicy": po_params = BigStepLayeredPolicyParams(convex_solver='cvx.CPLEX') elif activity_rates_policy_class_name == "BigStepPenaltyPolicy": po_params = BigStepPenaltyPolicyParams( convex_solver='cvx.CPLEX', boolean_action_flag=False, safety_penalty_coeff=10, nonidling_penalty_coeff=1000 ) hh_params = HedgehogHyperParams( activity_rates_policy_class_name=activity_rates_policy_class_name, horizon_drain_time_ratio=0, horizon_mpc_ratio=1, minimum_horizon=100, mpc_policy_class_name="FeedbackStationaryFeasibleMpcPolicy", theta_0=0.5 ) dp_params = DemandPlanningParams( demand_planning_class_name=None, params_dict=dict() ) name = "BigStepHedgehogAgent" return ac_params, wk_params, si_params, po_params, hh_params, si_class, dp_params, name
def test_get_policy_with_two_alternative_methods(): np.random.seed(42) num_buffers = 4 num_activities = 5 num_wl_vec = 4 constituency_matrix = np.array([[1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]]) list_boundary_constraint_matrices = [ np.array([[1, 1, 0, 0]]), np.array([[0, 1, 1, 0]]), np.array([[0, 0, 1, 1]]) ] ind_surplus_buffers = None num_resources = constituency_matrix.shape[0] k_idling_set = np.array([0]) draining_bottlenecks = {1} nu = np.array([[0.5, 0, 0.5], [0, 1, 0], [0, 0, 1], [0, 0.5, 0.5]]) kappa_w = 1e2 kappa = 1e2 policy_params = BigStepPenaltyPolicyParams('cvx.CPLEX', False, kappa_w, kappa) for _ in range(100): state = 10 * np.random.random_sample((num_buffers, 1)) cost_per_buffer = np.random.random_sample((num_buffers, 1)) demand_rate = 10 * np.random.random_sample((num_buffers, 1)) safety_stocks_vec = 2 * np.random.random_sample((num_resources, 1)) buffer_processing_matrix = 20 * (np.random.random_sample( (num_buffers, num_activities)) - .5) workload_mat = np.random.random_sample((num_wl_vec, num_buffers)) horizon = 100 policy_cvx = BigStepPolicy(cost_per_buffer, constituency_matrix, demand_rate, buffer_processing_matrix, workload_mat, nu, list_boundary_constraint_matrices, ind_surplus_buffers, policy_params) kwargs = { 'safety_stocks_vec': safety_stocks_vec, 'k_idling_set': k_idling_set, 'draining_bottlenecks': draining_bottlenecks, 'horizon': horizon } z_star_pen_cvx, opt_val_pen_cvx = policy_cvx.get_policy( state, **kwargs) allowed_activities = get_allowed_activities(policy_cvx, num_wl_vec, k_idling_set) z_star_pen_scipy, opt_val_pen_scipy = policy_utils.feedback_policy_nonidling_penalty_scipy( state, cost_per_buffer, constituency_matrix, buffer_processing_matrix, workload_mat, safety_stocks_vec, k_idling_set, draining_bottlenecks, kappa_w, list_boundary_constraint_matrices, allowed_activities, method='revised simplex', demand_rate=demand_rate, horizon=horizon) cvx_cost = get_cost(cost_per_buffer, buffer_processing_matrix, z_star_pen_cvx) scipy_cost = get_cost(cost_per_buffer, buffer_processing_matrix, z_star_pen_scipy) np.testing.assert_almost_equal(np.squeeze(cvx_cost), scipy_cost, decimal=3)
def test_feedback_policy_nonidling_constraint_two_alternative_methods(): np.random.seed(42) num_buffers = 4 num_activities = 5 num_wl_vec = 4 constituency_matrix = np.array([[1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]]) nu = np.array([[0.5, 0, 0.5], [0, 1, 0], [0, 0, 1], [0, 0.5, 0.5]]) list_boundary_constraint_matrices = [ np.array([[1, 0, 0, 1]]), np.array([[0, 1, 1, 0]]) ] ind_surplus_buffers = None kappa_w = 1e2 kappa = 1e2 policy_params = BigStepPenaltyPolicyParams('cvx.CPLEX', False, kappa_w, kappa) horizon = 100 k_idling_set = np.array(range(num_buffers)) draining_bottlenecks = {0} num_problems = 100 i = 0 # Num of feasible problems diff_opt_val = 0 diff_z_star = 0 num_resources = constituency_matrix.shape[0] for _ in range(num_problems): state = 10 * np.random.random_sample((num_buffers, 1)) cost_per_buffer = np.random.random_sample((num_buffers, 1)) + 0.1 safety_stocks_vec = 0 * np.random.random_sample((num_resources, 1)) buffer_processing_matrix = 20 * (np.random.random_sample( (num_buffers, num_activities)) - 1) workload_mat = 0.5 * np.random.random_sample((num_wl_vec, num_buffers)) demand_rate = np.random.random_sample((num_buffers, 1)) policy_cvx = BigStepPolicy(cost_per_buffer, constituency_matrix, demand_rate, buffer_processing_matrix, workload_mat, nu, list_boundary_constraint_matrices, ind_surplus_buffers, policy_params) allowed_activities = get_allowed_activities(policy_cvx, num_wl_vec, k_idling_set) z_star_scipy, opt_val_scipy = \ policy_utils.feedback_policy_nonidling_constraint_scipy( state, cost_per_buffer, constituency_matrix, buffer_processing_matrix, workload_mat, safety_stocks_vec, k_idling_set, draining_bottlenecks, list_boundary_constraint_matrices, allowed_activities, demand_rate, horizon) z_star_cvx, opt_val_cvx = \ policy_utils.feedback_policy_nonidling_constraint_cvx( state, cost_per_buffer, constituency_matrix, buffer_processing_matrix, workload_mat, safety_stocks_vec, k_idling_set, draining_bottlenecks, list_boundary_constraint_matrices, allowed_activities, demand_rate, horizon) if z_star_scipy is not None and z_star_cvx is not None: z_star_scipy = z_star_scipy[:, None] diff_opt_val_i = np.abs( (opt_val_cvx - opt_val_scipy) / opt_val_cvx) diff_z_star_i = np.linalg.norm(z_star_cvx - z_star_scipy) print(i, "diff opt val=", diff_opt_val_i) diff_opt_val += diff_opt_val_i diff_z_star += diff_z_star_i i += 1 # np.testing.assert_allclose(z_star_scipy, z_star_cvx, rtol=1e-2) # np.testing.assert_allclose(opt_val_scipy, opt_val_cvx, rtol=1e-2) rel_diff_opt_val = diff_opt_val / i rel_diff_z_star = diff_z_star / i np.testing.assert_almost_equal(rel_diff_opt_val, 0, decimal=2) np.testing.assert_almost_equal(rel_diff_z_star, 0, decimal=2) print("======================================") print("relative error opt_val=", rel_diff_opt_val) print("absolute error z_star=", rel_diff_z_star, "\n") print("Num feasible problems: ", i)