def __init__(self, *args, **kwargs): self.lead_time = 5 self.max_inventory = 4000 self.max_order_quantity = 2000 self.step_limit = 40 self.p_max = 100 # Max sale price self.h_max = 5 # Max holding cost self.k_max = 10 # Max lost sales penalty self.mu_max = 200 # Max mean of the demand distribution self.gamma = 1 # Discount factor assign_env_config(self, kwargs) self.obs_dim = self.lead_time + 5 self.observation_space = spaces.Box( low=np.zeros(self.obs_dim), high=np.array( [self.p_max, self.p_max, self.h_max, self.k_max, self.mu_max] + [self.max_order_quantity] * self.lead_time), dtype=np.float32) self.action_space = spaces.Box(low=np.zeros(1), high=np.array([self.max_order_quantity ]), dtype=np.float32) self.reset()
def __init__(self, *args, **kwargs): self.N = 200 self.item_limits_init = np.random.randint(1, 10, size=self.N) self.item_limits = self.item_limits_init.copy() super().__init__() self.item_weights = np.random.randint(1, 100, size=self.N) self.item_values = np.random.randint(0, 100, size=self.N) assign_env_config(self, kwargs) obs_space = spaces.Box(0, self.max_weight, shape=(3, self.N + 1), dtype=np.int32) if self.mask: self.observation_space = spaces.Dict({ "action_mask": spaces.Box(0, 1, shape=(len(self.item_limits), )), "avail_actions": spaces.Box(0, 1, shape=(len(self.item_limits), )), "state": obs_space }) else: self.observation_space = obs_space
def __init__(self, *args, **kwargs): self.cpu_capacity = 1 self.mem_capacity = 1 self.t_interval = 20 self.tol = 1e-5 self.step_limit = int(60 * 24 / self.t_interval) self.n_pms = 50 self.load_idx = np.array([1, 2]) self.seed = 0 self.mask = True assign_env_config(self, kwargs) self.action_space = spaces.Discrete(self.n_pms) if self.mask: self.observation_space = spaces.Dict({ "action_mask": spaces.Box(0, 1, shape=(self.n_pms, )), "avail_actions": spaces.Box(0, 1, shape=(self.n_pms, )), "state": spaces.Box(0, 1, shape=(self.n_pms + 1, 3)) }) else: self.observation_space = spaces.Box(0, 1, shape=(self.n_pms + 1, 3)) self.reset()
def __init__(self, *args, **kwargs): BoundedKnapsackEnv.__init__(self) self.item_weights = np.random.randint(1, 100, size=self.N) self.item_values = np.random.randint(0, 100, size=self.N) assign_env_config(self, kwargs) self.action_space = spaces.Discrete(2) obs_space = spaces.Box(0, self.max_weight, shape=(4, )) if self.mask: self.observation_space = spaces.Dict({ 'state': obs_space, 'avail_actions': spaces.Box(0, 1, shape=(2, )), 'action_mask': spaces.Box(0, 1, shape=(2, )) }) else: self.observation_space = obs_space self.step_counter = 0 self.step_limit = 50 self.state = self.reset() self._max_reward = 600
def __init__(self, *args, **kwargs): # Generate data with consistent random seed to ensure reproducibility self.N = 200 self.max_weight = 200 self.current_weight = 0 self._max_reward = 10000 self.mask = True self.seed = 0 self.item_numbers = np.arange(self.N) self.item_weights = np.random.randint(1, 100, size=self.N) self.item_values = np.random.randint(0, 100, size=self.N) self.over_packed_penalty = 0 self.randomize_params_on_reset = False # Add env_config, if any assign_env_config(self, kwargs) self.set_seed() obs_space = spaces.Box( 0, self.max_weight, shape=(2*self.N + 1,), dtype=np.int16) self.action_space = spaces.Discrete(self.N) if self.mask: self.observation_space = spaces.Dict({ "action_mask": spaces.Box(0, 1, shape=(self.N,)), "avail_actions": spaces.Box(0, 1, shape=(self.N,)), "state": obs_space }) else: self.observation_space = spaces.Box( 0, self.max_weight, shape=(2, self.N + 1), dtype=np.int16) self.reset()
def __init__(self, *args, **kwargs): self.N = 50 self.move_cost = -1 self.invalid_action_cost = -100 self.mask = False utils.assign_env_config(self, kwargs) self.nodes = np.arange(self.N) self.step_limit = 2 * self.N self.obs_dim = 1 + self.N**2 obs_space = spaces.Box(-1, self.N, shape=(self.obs_dim, ), dtype=np.int32) if self.mask: self.observation_space = spaces.Dict({ "action_mask": spaces.Box(0, 1, shape=(self.N, ), dtype=np.int8), "avail_actions": spaces.Box(0, 1, shape=(self.N, ), dtype=np.int8), "state": obs_space }) else: self.observation_space = obs_space self.action_space = spaces.Discrete(self.N) self.reset()
def __init__(self, *args, **kwargs): self.N = 50 self.invalid_action_cost = -100 self.mask = False utils.assign_env_config(self, kwargs) self.nodes = np.arange(self.N) self.coords = self._generate_coordinates() self.distance_matrix = self._get_distance_matrix() self.obs_dim = 1 + self.N obs_space = spaces.Box(-1, self.N, shape=(self.obs_dim, ), dtype=np.int32) if self.mask: self.observation_space = spaces.Dict({ "action_mask": spaces.Box(0, 1, shape=(self.N, ), dtype=np.int8), "avail_actions": spaces.Box(0, 1, shape=(self.N, ), dtype=np.int8), "state": obs_space }) else: self.observation_space = obs_space self.action_space = spaces.Discrete(self.N) self.reset()
def __init__(self, *args, **kwargs): self.n_restaurants = 2 self.max_orders = 10 self.order_prob = 0.5 self.vehicle_capacity = 4 self.grid = (5, 5) self.order_promise = 60 self.order_timeout_prob = 0.15 self.num_zones = 4 self.order_probs_per_zone = [0.1, 0.5, 0.3, 0.1] self.order_reward_min = [8, 5, 2, 1] self.order_reward_max = [12, 8, 5, 3] self.half_norm_scale_reward_per_zone = [0.5, 0.5, 0.5, 0.5] self.penalty_per_timestep = 0.1 self.penalty_per_move = 0.1 self.order_miss_penalty = 50 self.step_limit = 1000 self.mask = False self.info = {} assign_env_config(self, kwargs) self._order_nums = np.arange(self.max_orders) self.loc_permutations = [(x, y) for x in range(self.grid[0]) for y in range(self.grid[1])] self.action_dim = 1 + 3 * self.max_orders + self.n_restaurants self.obs_dim = 2 * self.n_restaurants + 4 + 6 * self.max_orders box_low = np.zeros(self.obs_dim) box_high = np.hstack([ np.repeat(max(self.grid), 2 * self.n_restaurants + 2), # Locations 0-5 np.repeat(self.vehicle_capacity, 2), # Vehicle capacities 6-7 np.tile( np.hstack([ 4, self.n_restaurants, self.grid, self.order_promise, max(self.order_reward_max) ]), self.max_orders) ]) if self.mask: self.observation_space = spaces.Dict({ 'action_mask': spaces.Box(low=np.zeros(self.action_dim), high=np.ones(self.action_dim), dtype=np.uint8), 'avail_actions': spaces.Box(low=np.zeros(self.action_dim), high=np.ones(self.action_dim), dtype=np.uint8), 'state': spaces.Box(low=box_low, high=box_high, dtype=np.float16) }) else: self.observation_space = spaces.Box(low=box_low, high=box_high, dtype=np.float16) self.action_space = spaces.Discrete(self.action_dim) self.reset()
def __init__(self, *args, **kwargs): super().__init__() self.item_probs = [0.5, 0.5] assign_env_config(self, kwargs) self._build_obs_space() self._check_settings() self.seed() self.state = self.reset()
def __init__(self, *args, **kwargs): super().__init__() self.bin_capacity = 100 self.item_probs = [0, 0, 0, 1 / 3, 0, 0, 0, 0, 2 / 3] self.item_sizes = np.arange(1, 10) self.step_limit = 1000 assign_env_config(self, kwargs) self._build_obs_space() self._check_settings() self.seed() self.state = self.reset()
def __init__(self, *args, **kwargs): self.bin_capacity = 9 self.item_sizes = [2, 3] self.item_probs = [0.8, 0.2] self.step_count = 0 self.step_limit = 100 self.mask = False assign_env_config(self, kwargs) self._build_obs_space() self._check_settings() self.seed() self.state = self.reset()
def __init__(self, *args, **kwargs): ''' num_periods = number of periods in simulation. Node specific parameters: - I0 = initial inventory. - C = production capacity. - v = production yield in the range (0, 1]. - o = unit operating cost (feed-based) - h = unit holding cost for excess on-hand inventory. Edge specific parameters: - L = lead times in betwen adjacent nodes. - p = unit price to send material between adjacent nodes (purchase price/reorder cost) - b = unit backlog cost or good-wil loss for unfulfilled market demand between adjacent retailer and market. - g = unit holding cost for pipeline inventory on a specified edge. - prob_dist = probability distribution function on a (retailer, market) edge. - demand_dist = demand distribution for (retailer, market) edge. Two options: - use scipy probability distribution: must be a lambda function calling the rvs method of the distribution i.e. lambda: poisson.rvs(mu=20) - use a list of user specified demands for each period. backlog = Are unfulfilled orders backlogged? True = backlogged, False = lost sales. demand_dist = distribution function for customer demand (e.g. poisson, binomial, uniform, geometric, etc.) dist_param = named values for parameters fed to statistical distribution. poisson: {'mu': <mean value>} binom: {'n': <mean value>, 'p': <probability between 0 and 1 of getting the mean value>} raindint: {'low' = <lower bound>, 'high': <upper bound>} geom: {'p': <probability. Outcome is the number of trials to success>} alpha = discount factor in the range (0,1] that accounts for the time value of money seed_int = integer seed for random state. user_D = dictionary with lists of user specified demand at each time period for on each (retail, market) pair. If lists are all zeros, ignored; otherwise, demands will be taken from this list. sample_path = dictionary with booleans specifying if the user_D on the same (retail, market) key is sampled from demand_dist. If true, then the average demand used in the LP model is calculated from the demand_dist; otherwise, it is taken from the user_D. ''' # set default (arbitrary) values when creating environment (if no args or kwargs are given) self._max_rewards = 2000 self.num_periods = 30 self.backlog = True self.alpha = 1.00 self.seed_int = 0 self.user_D = {(1, 0): np.zeros(self.num_periods)} self.sample_path = {(1, 0): False} self._max_rewards = 2000 # create graph self.graph = nx.DiGraph() # Market self.graph.add_nodes_from([0]) # Retailer self.graph.add_nodes_from([1], I0=100, h=0.030) # Distributors self.graph.add_nodes_from([2], I0=110, h=0.020) self.graph.add_nodes_from([3], I0=80, h=0.015) # Manufacturers self.graph.add_nodes_from([4], I0=400, C=90, o=0.010, v=1.000, h=0.012) self.graph.add_nodes_from([5], I0=350, C=90, o=0.015, v=1.000, h=0.013) self.graph.add_nodes_from([6], I0=380, C=80, o=0.012, v=1.000, h=0.011) # Raw materials self.graph.add_nodes_from([7, 8]) # Links self.graph.add_edges_from([(1, 0, { 'p': 2.000, 'b': 0.100, 'demand_dist': poisson, 'dist_param': { 'mu': 20 } }), (2, 1, { 'L': 5, 'p': 1.500, 'g': 0.010 }), (3, 1, { 'L': 3, 'p': 1.600, 'g': 0.015 }), (4, 2, { 'L': 8, 'p': 1.000, 'g': 0.008 }), (4, 3, { 'L': 10, 'p': 0.800, 'g': 0.006 }), (5, 2, { 'L': 9, 'p': 0.700, 'g': 0.005 }), (6, 2, { 'L': 11, 'p': 0.750, 'g': 0.007 }), (6, 3, { 'L': 12, 'p': 0.800, 'g': 0.004 }), (7, 4, { 'L': 0, 'p': 0.150, 'g': 0.000 }), (7, 5, { 'L': 1, 'p': 0.050, 'g': 0.005 }), (8, 5, { 'L': 2, 'p': 0.070, 'g': 0.002 }), (8, 6, { 'L': 0, 'p': 0.200, 'g': 0.000 })]) # add environment configuration dictionary and keyword arguments assign_env_config(self, kwargs) #save user_D and sample_path to graph metadata for link in self.user_D.keys(): d = self.user_D[link] self.graph.edges[link]['user_D'] = d if link in self.sample_path.keys(): self.graph.edges[link]['sample_path'] = self.sample_path[link] # parameters self.num_nodes = self.graph.number_of_nodes() self.adjacency_matrix = np.vstack(self.graph.edges()) # Set node levels self.levels = {} self.levels['retailer'] = np.array([1]) self.levels['distributor'] = np.unique( np.hstack([ list(self.graph.predecessors(i)) for i in self.levels['retailer'] ])) self.levels['manufacturer'] = np.unique( np.hstack([ list(self.graph.predecessors(i)) for i in self.levels['distributor'] ])) self.levels['raw_materials'] = np.unique( np.hstack([ list(self.graph.predecessors(i)) for i in self.levels['manufacturer'] ])) self.level_col = { 'retailer': 0, 'distributor': 1, 'manufacturer': 2, 'raw_materials': 3 } # This set-up doesn't work with a broad network self.market = [ j for j in self.graph.nodes() if len(list(self.graph.successors(j))) == 0 ] self.distrib = [ j for j in self.graph.nodes() if 'C' not in self.graph.nodes[j] and 'I0' in self.graph.nodes[j] ] self.retail = [ j for j in self.graph.nodes() if len( set.intersection(set(self.graph.successors(j)), set( self.market))) > 0 ] self.factory = [ j for j in self.graph.nodes() if 'C' in self.graph.nodes[j] ] self.rawmat = [ j for j in self.graph.nodes() if len(list(self.graph.predecessors(j))) == 0 ] self.main_nodes = np.sort(self.distrib + self.factory) self.reorder_links = [ e for e in self.graph.edges() if 'L' in self.graph.edges[e] ] #exclude links to markets (these cannot have lead time 'L') self.retail_links = [ e for e in self.graph.edges() if 'L' not in self.graph.edges[e] ] #links joining retailers to markets self.network_links = [e for e in self.graph.edges() ] #all links involved in sale in the network # check inputs assert set(self.graph.nodes()) == set.union( set(self.market), set(self.distrib), set(self.factory), set(self.rawmat) ), "The union of market, distribution, factory, and raw material nodes is not equal to the system nodes." for j in self.graph.nodes(): if 'I0' in self.graph.nodes[j]: assert self.graph.nodes[j][ 'I0'] >= 0, "The initial inventory cannot be negative for node {}.".format( j) if 'h' in self.graph.nodes[j]: assert self.graph.nodes[j][ 'h'] >= 0, "The inventory holding costs cannot be negative for node {}.".format( j) if 'C' in self.graph.nodes[j]: assert self.graph.nodes[j][ 'C'] > 0, "The production capacity must be positive for node {}.".format( j) if 'o' in self.graph.nodes[j]: assert self.graph.nodes[j][ 'o'] >= 0, "The operating costs cannot be negative for node {}.".format( j) if 'v' in self.graph.nodes[j]: assert self.graph.nodes[j]['v'] > 0 and self.graph.nodes[j][ 'v'] <= 1, "The production yield must be in the range (0, 1] for node {}.".format( j) for e in self.graph.edges(): if 'L' in self.graph.edges[e]: assert self.graph.edges[e][ 'L'] >= 0, "The lead time joining nodes {} cannot be negative.".format( e) if 'p' in self.graph.edges[e]: assert self.graph.edges[e][ 'p'] >= 0, "The sales price joining nodes {} cannot be negative.".format( e) if 'b' in self.graph.edges[e]: assert self.graph.edges[e][ 'b'] >= 0, "The unfulfilled demand costs joining nodes {} cannot be negative.".format( e) if 'g' in self.graph.edges[e]: assert self.graph.edges[e][ 'g'] >= 0, "The pipeline inventory holding costs joining nodes {} cannot be negative.".format( e) if 'user_D' in self.graph.edges[e]: assert len( self.graph.edges[e]['user_D'] ) == self.num_periods, "The user specified demand joining (retailer, market): {} must be of length {}.".format( e, self.num_periods) if 'sample_path' in self.graph.edges[e]: assert isinstance( self.graph.edges[e]['sample_path'], bool ), "When specifying if a user specified demand joining (retailer, market): {} is sampled from a distribution, sample_path must be a Boolean.".format( e) if 'demand_dist' in self.graph.edges[e]: dist = self.graph.edges[e][ 'demand_dist'] #extract distribution assert dist.cdf( 0, **self.graph.edges[e]['dist_param'] ), "Wrong parameters passed to the demand distribution joining (retailer, market): {}.".format( e) assert self.backlog == False or self.backlog == True, "The backlog parameter must be a boolean." assert self.graph.number_of_nodes( ) >= 2, "The minimum number of nodes is 2. Please try again" assert self.alpha > 0 and self.alpha <= 1, "alpha must be in the range (0, 1]." # set random generation seed (unless using user demands) self.seed(self.seed_int) # action space (reorder quantities for each node for each supplier; list) # An action is defined for every node num_reorder_links = len(self.reorder_links) self.lt_max = np.max([ self.graph.edges[e]['L'] for e in self.graph.edges() if 'L' in self.graph.edges[e] ]) self.init_inv_max = np.max([ self.graph.nodes[j]['I0'] for j in self.graph.nodes() if 'I0' in self.graph.nodes[j] ]) self.capacity_max = np.max([ self.graph.nodes[j]['C'] for j in self.graph.nodes() if 'C' in self.graph.nodes[j] ]) self.pipeline_length = sum([ self.graph.edges[e]['L'] for e in self.graph.edges() if 'L' in self.graph.edges[e] ]) self.lead_times = { e: self.graph.edges[e]['L'] for e in self.graph.edges() if 'L' in self.graph.edges[e] } self.obs_dim = self.pipeline_length + len(self.main_nodes) + len( self.retail_links) # self.pipeline_length = len(self.main_nodes)*(self.lt_max+1) self.action_space = gym.spaces.Box( low=np.zeros(num_reorder_links), high=np.ones(num_reorder_links) * (self.init_inv_max + self.capacity_max * self.num_periods), dtype=np.int32) # observation space (total inventory at each node, which is any integer value) self.observation_space = gym.spaces.Box( low=np.ones(self.obs_dim) * np.iinfo(np.int32).min, high=np.ones(self.obs_dim) * np.iinfo(np.int32).max, dtype=np.int32) # low=-np.ones(self.pipeline_length)*(self.init_inv_max + self.capacity_max*self.num_periods)*10, # high=np.ones(self.pipeline_length)*(self.init_inv_max + self.capacity_max*self.num_periods), # dtype=np.int32) # intialize self.reset()
def __init__(self, *args, **kwargs): ''' periods = [positive integer] number of periods in simulation. I0 = [non-negative integer; dimension |Stages|-1] initial inventories for each stage. p = [positive float] unit price for final product. r = [non-negative float; dimension |Stages|] unit cost for replenishment orders at each stage. k = [non-negative float; dimension |Stages|] backlog cost or goodwill loss (per unit) for unfulfilled orders (demand or replenishment orders). h = [non-negative float; dimension |Stages|-1] unit holding cost for excess on-hand inventory at each stage. (Note: does not include pipeline inventory). c = [positive integer; dimension |Stages|-1] production capacities for each suppliers (stages 1 through |Stage|). L = [non-negative integer; dimension |Stages|-1] lead times in betwen stages. backlog = [boolean] are unfulfilled orders backlogged? True = backlogged, False = lost sales. dist = [integer] value between 1 and 4. Specifies distribution for customer demand. 1: poisson distribution 2: binomial distribution 3: uniform random integer 4: geometric distribution 5: user supplied demand values dist_param = [dictionary] named values for parameters fed to statistical distribution. poisson: {'mu': <mean value>} binom: {'n': <mean value>, 'p': <probability between 0 and 1 of getting the mean value>} raindint: {'low' = <lower bound>, 'high': <upper bound>} geom: {'p': <probability. Outcome is the number of trials to success>} alpha = [float in range (0,1]] discount factor to account for the time value of money seed_int = [integer] seed for random state. user_D = [list] user specified demand for each time period in simulation ''' # set default (arbitrary) values when creating environment (if no args or kwargs are given) self.periods = 30 self.I0 = [100, 100, 200] self.p = 2 self.r = [1.5, 1.0, 0.75, 0.5] self.k = [0.10, 0.075, 0.05, 0.025] self.h = [0.15, 0.10, 0.05] self.c = [100, 90, 80] self.L = [3, 5, 10] self.backlog = True self.dist = 1 self.dist_param = {'mu': 20} self.alpha = 0.97 self.seed_int = 0 self.user_D = np.zeros(self.periods) self._max_rewards = 2000 # add environment configuration dictionary and keyword arguments assign_env_config(self, kwargs) # input parameters try: self.init_inv = np.array(list(self.I0)) except: self.init_inv = np.array([self.I0]) self.num_periods = self.periods self.unit_price = np.append(self.p,self.r[:-1]) # cost to stage 1 is price to stage 2 self.unit_cost = np.array(self.r) self.demand_cost = np.array(self.k) self.holding_cost = np.append(self.h,0) # holding cost at last stage is 0 try: self.supply_capacity = np.array(list(self.c)) except: self.supply_capacity = np.array([self.c]) try: self.lead_time = np.array(list(self.L)) except: self.lead_time = np.array([self.L]) self.discount = self.alpha self.user_D = np.array(list(self.user_D)) self.num_stages = len(self.init_inv) + 1 m = self.num_stages lt_max = self.lead_time.max() # parameters # dictionary with options for demand distributions distributions = {1:poisson, 2:binom, 3:randint, 4:geom, 5:self.user_D} # check inputs assert np.all(self.init_inv) >=0, "The initial inventory cannot be negative" try: assert self.num_periods > 0, "The number of periods must be positive. Num Periods = {}".format(self.num_periods) except TypeError: print('\n{}\n'.format(self.num_periods)) assert np.all(self.unit_price >= 0), "The sales prices cannot be negative." assert np.all(self.unit_cost >= 0), "The procurement costs cannot be negative." assert np.all(self.demand_cost >= 0), "The unfulfilled demand costs cannot be negative." assert np.all(self.holding_cost >= 0), "The inventory holding costs cannot be negative." assert np.all(self.supply_capacity > 0), "The supply capacities must be positive." assert np.all(self.lead_time >= 0), "The lead times cannot be negative." assert (self.backlog == False) | (self.backlog == True), "The backlog parameter must be a boolean." assert m >= 2, "The minimum number of stages is 2. Please try again" assert len(self.unit_cost) == m, "The length of r is not equal to the number of stages." assert len(self.demand_cost) == m, "The length of k is not equal to the number of stages." assert len(self.holding_cost) == m, "The length of h is not equal to the number of stages - 1." assert len(self.supply_capacity) == m-1, "The length of c is not equal to the number of stages - 1." assert len(self.lead_time) == m-1, "The length of L is not equal to the number of stages - 1." assert self.dist in [1,2,3,4,5], "dist must be one of 1, 2, 3, 4, 5." if self.dist < 5: assert distributions[self.dist].cdf(0,**self.dist_param), "Wrong parameters given for distribution." else: assert len(self.user_D) == self.num_periods, "The length of the user specified distribution is not equal to the number of periods." assert (self.alpha>0) & (self.alpha<=1), "alpha must be in the range (0,1]." # select distribution self.demand_dist = distributions[self.dist] # set random generation seed (unless using user demands) if self.dist < 5: self.seed(self.seed_int) # intialize self.reset() # action space (reorder quantities for each stage; list) # An action is defined for every stage (except last one) # self.action_space = gym.spaces.Tuple(tuple( # [gym.spaces.Box(0, i, shape=(1,)) for i in self.supply_capacity])) self.pipeline_length = (m-1)*(lt_max+1) self.action_space = gym.spaces.Box( low=np.zeros(m-1), high=self.supply_capacity, dtype=np.int16) # observation space (Inventory position at each echelon, which is any integer value) self.observation_space = gym.spaces.Box( low=-np.ones(self.pipeline_length)*self.supply_capacity.max()*self.num_periods*10, high=np.ones(self.pipeline_length)*self.supply_capacity.max()*self.num_periods, dtype=np.int32)