Ejemplos de assign_env_config en Python, ejemplos de or_gym.utils.assign_env_config en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: newsvendor.py Proyecto: toshakyamanoj/or-gym

    def __init__(self, *args, **kwargs):
        self.lead_time = 5
        self.max_inventory = 4000
        self.max_order_quantity = 2000
        self.step_limit = 40
        self.p_max = 100  # Max sale price
        self.h_max = 5  # Max holding cost
        self.k_max = 10  # Max lost sales penalty
        self.mu_max = 200  # Max mean of the demand distribution
        self.gamma = 1  # Discount factor
        assign_env_config(self, kwargs)

        self.obs_dim = self.lead_time + 5
        self.observation_space = spaces.Box(
            low=np.zeros(self.obs_dim),
            high=np.array(
                [self.p_max, self.p_max, self.h_max, self.k_max, self.mu_max] +
                [self.max_order_quantity] * self.lead_time),
            dtype=np.float32)
        self.action_space = spaces.Box(low=np.zeros(1),
                                       high=np.array([self.max_order_quantity
                                                      ]),
                                       dtype=np.float32)

        self.reset()

Ejemplo n.º 2

0

Mostrar archivo

Archivo: knapsack.py Proyecto: grossmann-group/or-gym

    def __init__(self, *args, **kwargs):
        self.N = 200
        self.item_limits_init = np.random.randint(1, 10, size=self.N)
        self.item_limits = self.item_limits_init.copy()
        super().__init__()
        self.item_weights = np.random.randint(1, 100, size=self.N)
        self.item_values = np.random.randint(0, 100, size=self.N)

        assign_env_config(self, kwargs)

        obs_space = spaces.Box(0,
                               self.max_weight,
                               shape=(3, self.N + 1),
                               dtype=np.int32)
        if self.mask:
            self.observation_space = spaces.Dict({
                "action_mask":
                spaces.Box(0, 1, shape=(len(self.item_limits), )),
                "avail_actions":
                spaces.Box(0, 1, shape=(len(self.item_limits), )),
                "state":
                obs_space
            })
        else:
            self.observation_space = obs_space

Ejemplo n.º 3

0

Mostrar archivo

    def __init__(self, *args, **kwargs):
        self.cpu_capacity = 1
        self.mem_capacity = 1
        self.t_interval = 20
        self.tol = 1e-5
        self.step_limit = int(60 * 24 / self.t_interval)
        self.n_pms = 50
        self.load_idx = np.array([1, 2])
        self.seed = 0
        self.mask = True
        assign_env_config(self, kwargs)
        self.action_space = spaces.Discrete(self.n_pms)

        if self.mask:
            self.observation_space = spaces.Dict({
                "action_mask":
                spaces.Box(0, 1, shape=(self.n_pms, )),
                "avail_actions":
                spaces.Box(0, 1, shape=(self.n_pms, )),
                "state":
                spaces.Box(0, 1, shape=(self.n_pms + 1, 3))
            })
        else:
            self.observation_space = spaces.Box(0,
                                                1,
                                                shape=(self.n_pms + 1, 3))
        self.reset()

Ejemplo n.º 4

0

Mostrar archivo

Archivo: knapsack.py Proyecto: toshakyamanoj/or-gym

    def __init__(self, *args, **kwargs):
        BoundedKnapsackEnv.__init__(self)
        self.item_weights = np.random.randint(1, 100, size=self.N)
        self.item_values = np.random.randint(0, 100, size=self.N)
        assign_env_config(self, kwargs)
        self.action_space = spaces.Discrete(2)

        obs_space = spaces.Box(0, self.max_weight, shape=(4, ))
        if self.mask:
            self.observation_space = spaces.Dict({
                'state':
                obs_space,
                'avail_actions':
                spaces.Box(0, 1, shape=(2, )),
                'action_mask':
                spaces.Box(0, 1, shape=(2, ))
            })
        else:
            self.observation_space = obs_space

        self.step_counter = 0
        self.step_limit = 50

        self.state = self.reset()
        self._max_reward = 600

Ejemplo n.º 5

0

Mostrar archivo

Archivo: knapsack.py Proyecto: stephanesbizzera/or-gym

 def __init__(self, *args, **kwargs):
     # Generate data with consistent random seed to ensure reproducibility
     self.N = 200
     self.max_weight = 200
     self.current_weight = 0
     self._max_reward = 10000
     self.mask = True
     self.seed = 0
     self.item_numbers = np.arange(self.N)
     self.item_weights = np.random.randint(1, 100, size=self.N)
     self.item_values = np.random.randint(0, 100, size=self.N)
     self.over_packed_penalty = 0
     self.randomize_params_on_reset = False
     # Add env_config, if any
     assign_env_config(self, kwargs)
     self.set_seed()
     
     obs_space = spaces.Box(
         0, self.max_weight, shape=(2*self.N + 1,), dtype=np.int16)
     self.action_space = spaces.Discrete(self.N)
     if self.mask:
         self.observation_space = spaces.Dict({
             "action_mask": spaces.Box(0, 1, shape=(self.N,)),
             "avail_actions": spaces.Box(0, 1, shape=(self.N,)),
             "state": obs_space
             })
     else:
         self.observation_space = spaces.Box(
             0, self.max_weight, shape=(2, self.N + 1), dtype=np.int16)
     
     self.reset()

Ejemplo n.º 6

0

Mostrar archivo

    def __init__(self, *args, **kwargs):
        self.N = 50
        self.move_cost = -1
        self.invalid_action_cost = -100
        self.mask = False
        utils.assign_env_config(self, kwargs)

        self.nodes = np.arange(self.N)
        self.step_limit = 2 * self.N
        self.obs_dim = 1 + self.N**2
        obs_space = spaces.Box(-1,
                               self.N,
                               shape=(self.obs_dim, ),
                               dtype=np.int32)
        if self.mask:
            self.observation_space = spaces.Dict({
                "action_mask":
                spaces.Box(0, 1, shape=(self.N, ), dtype=np.int8),
                "avail_actions":
                spaces.Box(0, 1, shape=(self.N, ), dtype=np.int8),
                "state":
                obs_space
            })
        else:
            self.observation_space = obs_space
        self.action_space = spaces.Discrete(self.N)

        self.reset()

Ejemplo n.º 7

0

Mostrar archivo

    def __init__(self, *args, **kwargs):
        self.N = 50
        self.invalid_action_cost = -100
        self.mask = False
        utils.assign_env_config(self, kwargs)
        self.nodes = np.arange(self.N)
        self.coords = self._generate_coordinates()
        self.distance_matrix = self._get_distance_matrix()

        self.obs_dim = 1 + self.N
        obs_space = spaces.Box(-1,
                               self.N,
                               shape=(self.obs_dim, ),
                               dtype=np.int32)
        if self.mask:
            self.observation_space = spaces.Dict({
                "action_mask":
                spaces.Box(0, 1, shape=(self.N, ), dtype=np.int8),
                "avail_actions":
                spaces.Box(0, 1, shape=(self.N, ), dtype=np.int8),
                "state":
                obs_space
            })
        else:
            self.observation_space = obs_space

        self.action_space = spaces.Discrete(self.N)

        self.reset()

Ejemplo n.º 8

0

Mostrar archivo

    def __init__(self, *args, **kwargs):
        self.n_restaurants = 2
        self.max_orders = 10
        self.order_prob = 0.5
        self.vehicle_capacity = 4
        self.grid = (5, 5)
        self.order_promise = 60
        self.order_timeout_prob = 0.15
        self.num_zones = 4
        self.order_probs_per_zone = [0.1, 0.5, 0.3, 0.1]
        self.order_reward_min = [8, 5, 2, 1]
        self.order_reward_max = [12, 8, 5, 3]
        self.half_norm_scale_reward_per_zone = [0.5, 0.5, 0.5, 0.5]
        self.penalty_per_timestep = 0.1
        self.penalty_per_move = 0.1
        self.order_miss_penalty = 50
        self.step_limit = 1000
        self.mask = False
        self.info = {}

        assign_env_config(self, kwargs)
        self._order_nums = np.arange(self.max_orders)
        self.loc_permutations = [(x, y) for x in range(self.grid[0])
                                 for y in range(self.grid[1])]
        self.action_dim = 1 + 3 * self.max_orders + self.n_restaurants
        self.obs_dim = 2 * self.n_restaurants + 4 + 6 * self.max_orders
        box_low = np.zeros(self.obs_dim)
        box_high = np.hstack([
            np.repeat(max(self.grid),
                      2 * self.n_restaurants + 2),  # Locations 0-5
            np.repeat(self.vehicle_capacity, 2),  # Vehicle capacities 6-7
            np.tile(
                np.hstack([
                    4, self.n_restaurants, self.grid, self.order_promise,
                    max(self.order_reward_max)
                ]), self.max_orders)
        ])

        if self.mask:
            self.observation_space = spaces.Dict({
                'action_mask':
                spaces.Box(low=np.zeros(self.action_dim),
                           high=np.ones(self.action_dim),
                           dtype=np.uint8),
                'avail_actions':
                spaces.Box(low=np.zeros(self.action_dim),
                           high=np.ones(self.action_dim),
                           dtype=np.uint8),
                'state':
                spaces.Box(low=box_low, high=box_high, dtype=np.float16)
            })
        else:
            self.observation_space = spaces.Box(low=box_low,
                                                high=box_high,
                                                dtype=np.float16)

        self.action_space = spaces.Discrete(self.action_dim)

        self.reset()

Ejemplo n.º 9

0

Mostrar archivo

Archivo: binpacking.py Proyecto: stephanesbizzera/or-gym

 def __init__(self, *args, **kwargs):
     super().__init__()
     self.item_probs = [0.5, 0.5]
     assign_env_config(self, kwargs)
     self._build_obs_space()
     self._check_settings()
     self.seed()
     self.state = self.reset()

Ejemplo n.º 10

0

Mostrar archivo

Archivo: binpacking.py Proyecto: stephanesbizzera/or-gym

 def __init__(self, *args, **kwargs):
     super().__init__()
     self.bin_capacity = 100
     self.item_probs = [0, 0, 0, 1 / 3, 0, 0, 0, 0, 2 / 3]
     self.item_sizes = np.arange(1, 10)
     self.step_limit = 1000
     assign_env_config(self, kwargs)
     self._build_obs_space()
     self._check_settings()
     self.seed()
     self.state = self.reset()

Ejemplo n.º 11

0

Mostrar archivo

Archivo: binpacking.py Proyecto: stephanesbizzera/or-gym

 def __init__(self, *args, **kwargs):
     self.bin_capacity = 9
     self.item_sizes = [2, 3]
     self.item_probs = [0.8, 0.2]
     self.step_count = 0
     self.step_limit = 100
     self.mask = False
     assign_env_config(self, kwargs)
     self._build_obs_space()
     self._check_settings()
     self.seed()
     self.state = self.reset()

Ejemplo n.º 12

0

Mostrar archivo

Archivo: inventory_management.py Proyecto: grossmann-group/or-gym

    def __init__(self, *args, **kwargs):
        '''
        num_periods = number of periods in simulation.
        Node specific parameters:
            - I0 = initial inventory.
            - C = production capacity.
            - v = production yield in the range (0, 1].
            - o = unit operating cost (feed-based)
            - h = unit holding cost for excess on-hand inventory.
        Edge specific parameters:
            - L = lead times in betwen adjacent nodes.
            - p = unit price to send material between adjacent nodes (purchase price/reorder cost)
            - b = unit backlog cost or good-wil loss for unfulfilled market demand between adjacent retailer and market.
            - g = unit holding cost for pipeline inventory on a specified edge.
            - prob_dist = probability distribution function on a (retailer, market) edge.
            - demand_dist = demand distribution for (retailer, market) edge. Two options:
                - use scipy probability distribution: must be a lambda function calling the rvs method of the distribution
                    i.e. lambda: poisson.rvs(mu=20)
                - use a list of user specified demands for each period. 
        backlog = Are unfulfilled orders backlogged? True = backlogged, False = lost sales.
        demand_dist = distribution function for customer demand (e.g. poisson, binomial, uniform, geometric, etc.)
        dist_param = named values for parameters fed to statistical distribution.
            poisson: {'mu': <mean value>}
            binom: {'n': <mean value>, 
                    'p': <probability between 0 and 1 of getting the mean value>}
            raindint: {'low' = <lower bound>, 'high': <upper bound>}
            geom: {'p': <probability. Outcome is the number of trials to success>}
        alpha = discount factor in the range (0,1] that accounts for the time value of money
        seed_int = integer seed for random state.
        user_D = dictionary with lists of user specified demand at each time period for on each (retail, market) pair. 
            If lists are all zeros, ignored; otherwise, demands will be taken from this list.
        sample_path = dictionary with booleans specifying if the user_D on the same (retail, market) key is sampled from demand_dist.
            If true, then the average demand used in the LP model is calculated from the demand_dist; otherwise, it is
            taken from the user_D.
        '''
        # set default (arbitrary) values when creating environment (if no args or kwargs are given)
        self._max_rewards = 2000
        self.num_periods = 30
        self.backlog = True
        self.alpha = 1.00
        self.seed_int = 0
        self.user_D = {(1, 0): np.zeros(self.num_periods)}
        self.sample_path = {(1, 0): False}
        self._max_rewards = 2000

        # create graph
        self.graph = nx.DiGraph()
        # Market
        self.graph.add_nodes_from([0])
        # Retailer
        self.graph.add_nodes_from([1], I0=100, h=0.030)
        # Distributors
        self.graph.add_nodes_from([2], I0=110, h=0.020)
        self.graph.add_nodes_from([3], I0=80, h=0.015)
        # Manufacturers
        self.graph.add_nodes_from([4], I0=400, C=90, o=0.010, v=1.000, h=0.012)
        self.graph.add_nodes_from([5], I0=350, C=90, o=0.015, v=1.000, h=0.013)
        self.graph.add_nodes_from([6], I0=380, C=80, o=0.012, v=1.000, h=0.011)
        # Raw materials
        self.graph.add_nodes_from([7, 8])
        # Links
        self.graph.add_edges_from([(1, 0, {
            'p': 2.000,
            'b': 0.100,
            'demand_dist': poisson,
            'dist_param': {
                'mu': 20
            }
        }), (2, 1, {
            'L': 5,
            'p': 1.500,
            'g': 0.010
        }), (3, 1, {
            'L': 3,
            'p': 1.600,
            'g': 0.015
        }), (4, 2, {
            'L': 8,
            'p': 1.000,
            'g': 0.008
        }), (4, 3, {
            'L': 10,
            'p': 0.800,
            'g': 0.006
        }), (5, 2, {
            'L': 9,
            'p': 0.700,
            'g': 0.005
        }), (6, 2, {
            'L': 11,
            'p': 0.750,
            'g': 0.007
        }), (6, 3, {
            'L': 12,
            'p': 0.800,
            'g': 0.004
        }), (7, 4, {
            'L': 0,
            'p': 0.150,
            'g': 0.000
        }), (7, 5, {
            'L': 1,
            'p': 0.050,
            'g': 0.005
        }), (8, 5, {
            'L': 2,
            'p': 0.070,
            'g': 0.002
        }), (8, 6, {
            'L': 0,
            'p': 0.200,
            'g': 0.000
        })])

        # add environment configuration dictionary and keyword arguments
        assign_env_config(self, kwargs)

        #save user_D and sample_path to graph metadata
        for link in self.user_D.keys():
            d = self.user_D[link]
            self.graph.edges[link]['user_D'] = d
            if link in self.sample_path.keys():
                self.graph.edges[link]['sample_path'] = self.sample_path[link]

        #  parameters
        self.num_nodes = self.graph.number_of_nodes()
        self.adjacency_matrix = np.vstack(self.graph.edges())
        # Set node levels
        self.levels = {}
        self.levels['retailer'] = np.array([1])
        self.levels['distributor'] = np.unique(
            np.hstack([
                list(self.graph.predecessors(i))
                for i in self.levels['retailer']
            ]))
        self.levels['manufacturer'] = np.unique(
            np.hstack([
                list(self.graph.predecessors(i))
                for i in self.levels['distributor']
            ]))
        self.levels['raw_materials'] = np.unique(
            np.hstack([
                list(self.graph.predecessors(i))
                for i in self.levels['manufacturer']
            ]))

        self.level_col = {
            'retailer': 0,
            'distributor': 1,
            'manufacturer': 2,
            'raw_materials': 3
        }

        # This set-up doesn't work with a broad network
        self.market = [
            j for j in self.graph.nodes()
            if len(list(self.graph.successors(j))) == 0
        ]
        self.distrib = [
            j for j in self.graph.nodes()
            if 'C' not in self.graph.nodes[j] and 'I0' in self.graph.nodes[j]
        ]
        self.retail = [
            j for j in self.graph.nodes() if len(
                set.intersection(set(self.graph.successors(j)), set(
                    self.market))) > 0
        ]
        self.factory = [
            j for j in self.graph.nodes() if 'C' in self.graph.nodes[j]
        ]
        self.rawmat = [
            j for j in self.graph.nodes()
            if len(list(self.graph.predecessors(j))) == 0
        ]
        self.main_nodes = np.sort(self.distrib + self.factory)
        self.reorder_links = [
            e for e in self.graph.edges() if 'L' in self.graph.edges[e]
        ]  #exclude links to markets (these cannot have lead time 'L')
        self.retail_links = [
            e for e in self.graph.edges() if 'L' not in self.graph.edges[e]
        ]  #links joining retailers to markets
        self.network_links = [e for e in self.graph.edges()
                              ]  #all links involved in sale in the network

        # check inputs
        assert set(self.graph.nodes()) == set.union(
            set(self.market), set(self.distrib), set(self.factory),
            set(self.rawmat)
        ), "The union of market, distribution, factory, and raw material nodes is not equal to the system nodes."
        for j in self.graph.nodes():
            if 'I0' in self.graph.nodes[j]:
                assert self.graph.nodes[j][
                    'I0'] >= 0, "The initial inventory cannot be negative for node {}.".format(
                        j)
            if 'h' in self.graph.nodes[j]:
                assert self.graph.nodes[j][
                    'h'] >= 0, "The inventory holding costs cannot be negative for node {}.".format(
                        j)
            if 'C' in self.graph.nodes[j]:
                assert self.graph.nodes[j][
                    'C'] > 0, "The production capacity must be positive for node {}.".format(
                        j)
            if 'o' in self.graph.nodes[j]:
                assert self.graph.nodes[j][
                    'o'] >= 0, "The operating costs cannot be negative for node {}.".format(
                        j)
            if 'v' in self.graph.nodes[j]:
                assert self.graph.nodes[j]['v'] > 0 and self.graph.nodes[j][
                    'v'] <= 1, "The production yield must be in the range (0, 1] for node {}.".format(
                        j)
        for e in self.graph.edges():
            if 'L' in self.graph.edges[e]:
                assert self.graph.edges[e][
                    'L'] >= 0, "The lead time joining nodes {} cannot be negative.".format(
                        e)
            if 'p' in self.graph.edges[e]:
                assert self.graph.edges[e][
                    'p'] >= 0, "The sales price joining nodes {} cannot be negative.".format(
                        e)
            if 'b' in self.graph.edges[e]:
                assert self.graph.edges[e][
                    'b'] >= 0, "The unfulfilled demand costs joining nodes {} cannot be negative.".format(
                        e)
            if 'g' in self.graph.edges[e]:
                assert self.graph.edges[e][
                    'g'] >= 0, "The pipeline inventory holding costs joining nodes {} cannot be negative.".format(
                        e)
            if 'user_D' in self.graph.edges[e]:
                assert len(
                    self.graph.edges[e]['user_D']
                ) == self.num_periods, "The user specified demand joining (retailer, market): {} must be of length {}.".format(
                    e, self.num_periods)
            if 'sample_path' in self.graph.edges[e]:
                assert isinstance(
                    self.graph.edges[e]['sample_path'], bool
                ), "When specifying if a user specified demand joining (retailer, market): {} is sampled from a distribution, sample_path must be a Boolean.".format(
                    e)
            if 'demand_dist' in self.graph.edges[e]:
                dist = self.graph.edges[e][
                    'demand_dist']  #extract distribution
                assert dist.cdf(
                    0, **self.graph.edges[e]['dist_param']
                ), "Wrong parameters passed to the demand distribution joining (retailer, market): {}.".format(
                    e)
        assert self.backlog == False or self.backlog == True, "The backlog parameter must be a boolean."
        assert self.graph.number_of_nodes(
        ) >= 2, "The minimum number of nodes is 2. Please try again"
        assert self.alpha > 0 and self.alpha <= 1, "alpha must be in the range (0, 1]."

        # set random generation seed (unless using user demands)
        self.seed(self.seed_int)

        # action space (reorder quantities for each node for each supplier; list)
        # An action is defined for every node
        num_reorder_links = len(self.reorder_links)
        self.lt_max = np.max([
            self.graph.edges[e]['L'] for e in self.graph.edges()
            if 'L' in self.graph.edges[e]
        ])
        self.init_inv_max = np.max([
            self.graph.nodes[j]['I0'] for j in self.graph.nodes()
            if 'I0' in self.graph.nodes[j]
        ])
        self.capacity_max = np.max([
            self.graph.nodes[j]['C'] for j in self.graph.nodes()
            if 'C' in self.graph.nodes[j]
        ])
        self.pipeline_length = sum([
            self.graph.edges[e]['L'] for e in self.graph.edges()
            if 'L' in self.graph.edges[e]
        ])
        self.lead_times = {
            e: self.graph.edges[e]['L']
            for e in self.graph.edges() if 'L' in self.graph.edges[e]
        }
        self.obs_dim = self.pipeline_length + len(self.main_nodes) + len(
            self.retail_links)
        # self.pipeline_length = len(self.main_nodes)*(self.lt_max+1)
        self.action_space = gym.spaces.Box(
            low=np.zeros(num_reorder_links),
            high=np.ones(num_reorder_links) *
            (self.init_inv_max + self.capacity_max * self.num_periods),
            dtype=np.int32)
        # observation space (total inventory at each node, which is any integer value)
        self.observation_space = gym.spaces.Box(
            low=np.ones(self.obs_dim) * np.iinfo(np.int32).min,
            high=np.ones(self.obs_dim) * np.iinfo(np.int32).max,
            dtype=np.int32)
        # low=-np.ones(self.pipeline_length)*(self.init_inv_max + self.capacity_max*self.num_periods)*10,
        # high=np.ones(self.pipeline_length)*(self.init_inv_max + self.capacity_max*self.num_periods),
        # dtype=np.int32)

        # intialize
        self.reset()

Ejemplo n.º 13

0

Mostrar archivo

Archivo: newsvendor.py Proyecto: grossmann-group/or-gym

    def __init__(self, *args, **kwargs):
        '''
        periods = [positive integer] number of periods in simulation.
        I0 = [non-negative integer; dimension |Stages|-1] initial inventories for each stage.
        p = [positive float] unit price for final product.
        r = [non-negative float; dimension |Stages|] unit cost for replenishment orders at each stage.
        k = [non-negative float; dimension |Stages|] backlog cost or goodwill loss (per unit) for unfulfilled orders (demand or replenishment orders).
        h = [non-negative float; dimension |Stages|-1] unit holding cost for excess on-hand inventory at each stage.
            (Note: does not include pipeline inventory).
        c = [positive integer; dimension |Stages|-1] production capacities for each suppliers (stages 1 through |Stage|).
        L = [non-negative integer; dimension |Stages|-1] lead times in betwen stages.
        backlog = [boolean] are unfulfilled orders backlogged? True = backlogged, False = lost sales.
        dist = [integer] value between 1 and 4. Specifies distribution for customer demand.
            1: poisson distribution
            2: binomial distribution
            3: uniform random integer
            4: geometric distribution
            5: user supplied demand values
        dist_param = [dictionary] named values for parameters fed to statistical distribution.
            poisson: {'mu': <mean value>}
            binom: {'n': <mean value>, 'p': <probability between 0 and 1 of getting the mean value>}
            raindint: {'low' = <lower bound>, 'high': <upper bound>}
            geom: {'p': <probability. Outcome is the number of trials to success>}
        alpha = [float in range (0,1]] discount factor to account for the time value of money
        seed_int = [integer] seed for random state.
        user_D = [list] user specified demand for each time period in simulation
        '''
        # set default (arbitrary) values when creating environment (if no args or kwargs are given)
        self.periods = 30
        self.I0 = [100, 100, 200]
        self.p = 2
        self.r = [1.5, 1.0, 0.75, 0.5]
        self.k = [0.10, 0.075, 0.05, 0.025]
        self.h = [0.15, 0.10, 0.05]
        self.c = [100, 90, 80]
        self.L = [3, 5, 10]
        self.backlog = True
        self.dist = 1
        self.dist_param = {'mu': 20}
        self.alpha = 0.97
        self.seed_int = 0
        self.user_D = np.zeros(self.periods)
        self._max_rewards = 2000
        
        # add environment configuration dictionary and keyword arguments
        assign_env_config(self, kwargs)
        
        # input parameters
        try:
            self.init_inv = np.array(list(self.I0))
        except:
            self.init_inv = np.array([self.I0])
        self.num_periods = self.periods
        self.unit_price = np.append(self.p,self.r[:-1]) # cost to stage 1 is price to stage 2
        self.unit_cost = np.array(self.r)
        self.demand_cost = np.array(self.k)
        self.holding_cost = np.append(self.h,0) # holding cost at last stage is 0
        try:
            self.supply_capacity = np.array(list(self.c))
        except:
            self.supply_capacity = np.array([self.c])
        try:
            self.lead_time = np.array(list(self.L))
        except:
            self.lead_time = np.array([self.L])
        self.discount = self.alpha
        self.user_D = np.array(list(self.user_D))
        self.num_stages = len(self.init_inv) + 1
        m = self.num_stages
        lt_max = self.lead_time.max()
        
        #  parameters
        #  dictionary with options for demand distributions
        distributions = {1:poisson,
                         2:binom,
                         3:randint,
                         4:geom,
                         5:self.user_D}

        # check inputs
        assert np.all(self.init_inv) >=0, "The initial inventory cannot be negative"
        try:
            assert self.num_periods > 0, "The number of periods must be positive. Num Periods = {}".format(self.num_periods)
        except TypeError:
            print('\n{}\n'.format(self.num_periods))
        assert np.all(self.unit_price >= 0), "The sales prices cannot be negative."
        assert np.all(self.unit_cost >= 0), "The procurement costs cannot be negative."
        assert np.all(self.demand_cost >= 0), "The unfulfilled demand costs cannot be negative."
        assert np.all(self.holding_cost >= 0), "The inventory holding costs cannot be negative."
        assert np.all(self.supply_capacity > 0), "The supply capacities must be positive."
        assert np.all(self.lead_time >= 0), "The lead times cannot be negative."
        assert (self.backlog == False) | (self.backlog == True), "The backlog parameter must be a boolean."
        assert m >= 2, "The minimum number of stages is 2. Please try again"
        assert len(self.unit_cost) == m, "The length of r is not equal to the number of stages."
        assert len(self.demand_cost) == m, "The length of k is not equal to the number of stages."
        assert len(self.holding_cost) == m, "The length of h is not equal to the number of stages - 1."
        assert len(self.supply_capacity) == m-1, "The length of c is not equal to the number of stages - 1."
        assert len(self.lead_time) == m-1, "The length of L is not equal to the number of stages - 1."
        assert self.dist in [1,2,3,4,5], "dist must be one of 1, 2, 3, 4, 5."
        if self.dist < 5:
            assert distributions[self.dist].cdf(0,**self.dist_param), "Wrong parameters given for distribution."
        else:
            assert len(self.user_D) == self.num_periods, "The length of the user specified distribution is not equal to the number of periods."
        assert (self.alpha>0) & (self.alpha<=1), "alpha must be in the range (0,1]."
        
        # select distribution
        self.demand_dist = distributions[self.dist]  
        
        # set random generation seed (unless using user demands)
        if self.dist < 5:
            self.seed(self.seed_int)

        # intialize
        self.reset()
        
        # action space (reorder quantities for each stage; list)
        # An action is defined for every stage (except last one)
        # self.action_space = gym.spaces.Tuple(tuple(
            # [gym.spaces.Box(0, i, shape=(1,)) for i in self.supply_capacity]))
        self.pipeline_length = (m-1)*(lt_max+1)
        self.action_space = gym.spaces.Box(
            low=np.zeros(m-1), high=self.supply_capacity, dtype=np.int16)
        # observation space (Inventory position at each echelon, which is any integer value)
        self.observation_space = gym.spaces.Box(
            low=-np.ones(self.pipeline_length)*self.supply_capacity.max()*self.num_periods*10,
            high=np.ones(self.pipeline_length)*self.supply_capacity.max()*self.num_periods, dtype=np.int32)