def agent_ids(self): agents = [] for f_id in self.world.facilities.keys(): agents.append(Utils.agentid_producer(f_id)) for f_id in self.world.facilities.keys(): agents.append(Utils.agentid_consumer(f_id)) return agents
def _retailer_profit(self, env, step_outcome): # 终端(Retailer)营业额 wc = self.env_config['global_reward_weight_consumer'] parent_facility_balance = dict() # 计算SKU的Reward的时候,将其所属的Store的Reward也计算在内(以一定的权重wc) for facility in env.world.facilities.values(): if isinstance(facility, ProductUnit): parent_facility_balance[ facility.id] = step_outcome.facility_step_balance_sheets[ facility.facility.id] else: parent_facility_balance[ facility.id] = step_outcome.facility_step_balance_sheets[ facility.id] consumer_reward_by_facility = { f_id: wc * parent_facility_balance[f_id] + (1 - wc) * reward for f_id, reward in step_outcome.facility_step_balance_sheets.items() } rewards_by_agent = {} for f_id, reward in step_outcome.facility_step_balance_sheets.items(): rewards_by_agent[Utils.agentid_producer(f_id)] = reward / 1000000.0 for f_id, reward in consumer_reward_by_facility.items(): rewards_by_agent[Utils.agentid_consumer(f_id)] = reward / 1000000.0 return rewards_by_agent
def _actions_to_control(self, facility, actions): control = FacilityCell.Control(unit_price=0, production_rate=0, consumer_product_id=0, consumer_source_id=0, consumer_quantity=0, consumer_vlt=0) consumer_action_list = Utils.get_consumer_action_space() if isinstance(facility, FacilityCell): return control for agent_id, action in actions: # action = np.array(action).flatten() if Utils.is_producer_agent(agent_id): if isinstance(facility, SKUSupplierUnit): control.production_rate = facility.sku_info[ 'production_rate'] if Utils.is_consumer_agent(agent_id): product_id = facility.bom.output_product_id control.consumer_product_id = product_id if facility.consumer.sources is not None: source = facility.consumer.sources[0] control.consumer_vlt = source.sku_info['vlt'] control.consumer_source_id = 0 # action[0] control.consumer_quantity = int(consumer_action_list[action] * facility.get_sale_mean()) return control
def world_to_state(self, world): state = {} for facility_id, facility in world.facilities.items(): f_state = self._state(facility) self._add_global_features(f_state, world) state[Utils.agentid_producer(facility_id)] = f_state state[Utils.agentid_consumer(facility_id)] = f_state return self._serialize_state(state), state
def load_policy(agent_id): _facility = env.world.facilities[Utils.agentid_to_fid(agent_id)] if Utils.is_producer_agent(agent_id): return ProducerBaselinePolicy(env.observation_space, env.action_space_producer, BaselinePolicy.get_config_from_env(env)) # elif isinstance(_facility, SKUStoreUnit) or isinstance(_facility, SKUWarehouseUnit): elif isinstance(_facility, SKUStoreUnit): policy = ConsumerBaseStockPolicy(env.observation_space, env.action_space_consumer, BaselinePolicy.get_config_from_env(env), is_static) return policy else: return ConsumerBaselinePolicy(env.observation_space, env.action_space_consumer, BaselinePolicy.get_config_from_env(env))
def load_policy(agent_id): if Utils.is_producer_agent(agent_id): return ProducerBaselinePolicy( env.observation_space, env.action_space_producer, BaselinePolicy.get_config_from_env(env)) elif Utils.is_consumer_agent(agent_id): return ConsumerMinMaxPolicy( env.observation_space, env.action_space_consumer, BaselinePolicy.get_config_from_env(env)) else: raise Exception(f'Unknown agent type {agent_id}')
def __init__(self, facility, config, economy): self.facility = facility self.economy = economy self.config = config hist_len = Utils.get_env_config()['sale_hist_len'] pred_len = Utils.get_env_config()['demand_prediction_len'] self.backlog_demand_hist = [0] * hist_len self.sale_hist = [0] * hist_len self.pred_sale = [0] * pred_len # print(self.sale_hist) self.total_backlog_demand = 0
def load_policy(agent_id): agent_echelon = env.world.agent_echelon[Utils.agentid_to_fid(agent_id)] if Utils.is_producer_agent(agent_id): policy_name = 'baseline_producer' else: if agent_echelon == total_echelon - 1: policy_name = 'ppo_store_consumer' else: if agent_echelon >= total_echelon-echelon_to_train: policy_name = 'ppo_warehouse_consumer' else: policy_name = 'baseline_consumer' return ppo_trainer.get_policy(policy_name)
def echelon_policy_map_fn(echelon, agent_id): facility_id = Utils.agentid_to_fid(agent_id) if Utils.is_producer_agent(agent_id): return 'baseline_producer' else: agent_echelon = env.world.agent_echelon[facility_id] if agent_echelon == 0: # supplier return 'baseline_consumer' elif agent_echelon == env.world.total_echelon - 1: # retailer return 'ppo_store_consumer' elif agent_echelon >= echelon: # warehouse and current layer is trainning or has been trained. return 'ppo_warehouse_consumer' else: # warehouse on layers that haven't been trained yet return 'baseline_consumer'
def _add_facility_features(self, state, _facility): # 对facility类型进行one-hot encoding facility_type = [0] * len(self.env.facility_types) facility_type[self.env.facility_types[_facility.__class__.__name__]] = 1 state['facility_type'] = facility_type state['facility'] = _facility if isinstance(_facility, FacilityCell): state['facility_info'] = _facility.facility_info state['sku_info'] = {} state['is_positive_balance'] = 1 if _facility.economy.total_balance.total() > 0 else 0 else: state['facility_info'] = _facility.facility.facility_info state['sku_info'] = _facility.sku_info state['is_positive_balance'] = 1 if _facility.facility.economy.total_balance.total() > 0 else 0 # one-hot encoding of facility id facility_id_one_hot = [0] * len(self.env.world.facilities) facility_id_one_hot[_facility.id_num - 1] = 1 state['facility_id'] = facility_id_one_hot # add echelon feature facility = _facility if isinstance(facility, ProductUnit): facility = _facility.facility if isinstance(facility, WarehouseCell): # reserve 0 for supplier state['echelon_level'] = facility.echelon_level + 1 elif isinstance(facility, SupplierCell): state['echelon_level'] = 0 else: state['echelon_level'] = Utils.get_env_config()['total_echelons']
def render(self, file_name): fig, axs = plt.subplots(2, 1, figsize=(25, 10)) x = np.linspace(0, self.episod_len, self.episod_len) _agent_list = [] _step_balances_idx = [] for i, f in enumerate(self.facility_names): if (f.startswith('SKUStoreUnit') or f.startswith('OuterSKUStoreUnit') ) and Utils.is_consumer_agent(f): _agent_list.append(f) _step_balances_idx.append(i) _step_balances = [ self.step_balances[0, :, i] for i in _step_balances_idx ] # axs[0].set_title('Global balance') # axs[0].plot(x, self.global_balances.T) axs[0].set_title('Cumulative Sum of Balance') axs[0].plot(x, np.cumsum(np.sum(_step_balances, axis=0))) axs[1].set_title('Reward Breakdown by Agent (One Episod)') axs[1].plot(x, np.cumsum(_step_balances, axis=0).T) axs[1].legend(_agent_list, loc='upper left') fig.savefig(file_name)
def load_policy(agent_id): if Utils.is_producer_agent(agent_id): return ProducerBaselinePolicy(env.observation_space, env.action_space_producer, BaselinePolicy.get_config_from_env(env)) if agent_id.startswith('SKUStoreUnit') or agent_id.startswith('OuterSKUStoreUnit'): return ConsumerEOQPolicy(env.observation_space, env.action_space_consumer, BaselinePolicy.get_config_from_env(env)) else: return ConsumerBaselinePolicy(env.observation_space, env.action_space_consumer, BaselinePolicy.get_config_from_env(env))
def policy_map_fn(agent_id): if Utils.is_producer_agent(agent_id): return 'baseline_producer' else: if agent_id.startswith('SKUStoreUnit') or agent_id.startswith('OuterSKUStoreUnit'): return 'dqn_store_consumer' else: return 'baseline_consumer'
def reset(self): self.open_orders = {} self.economy.total_units_received = 0 self.economy.total_units_purchased = 0 self.lost_product_value = 0 self.products_received = 0 self.latest_consumptions = [ 0 ] * Utils.get_env_config()['consumption_hist_len']
def load_base_policy(agent_id): if Utils.is_producer_agent(agent_id): return ProducerBaselinePolicy( env.observation_space, env.action_space_producer, BaselinePolicy.get_config_from_env(env)) else: return ConsumerBaselinePolicy( env.observation_space, env.action_space_consumer, BaselinePolicy.get_config_from_env(env))
def __init__(self, env_config): self.env_config = env_config if(self.env_config['training'] and env_config['init']=='rnd'): self.copy_world = None self.world_idx = rnd.randint(1, env_config['episod_duration']) self.world = WorldBuilder.create(80, 16) self.current_iteration = 0 self.n_iterations = 0 self.policies = None # self.trainer = None self.product_ids = self._product_ids() # 存储当前最大的前置商品数量和车辆数量 self.max_sources_per_facility = 0 self.max_fleet_size = 0 self.facility_types = {} facility_class_id = 0 for f in self.world.facilities.values(): if isinstance(f, FacilityCell): sources_num = 0 for sku in f.sku_in_stock: if sku.consumer is not None and sku.consumer.sources is not None: sources_num = len(sku.consumer.sources) if sources_num > self.max_sources_per_facility: self.max_sources_per_facility = sources_num if f.distribution is not None: if len(f.distribution.fleet) > self.max_fleet_size: self.max_fleet_size = len(f.distribution.fleet) facility_class = f.__class__.__name__ if facility_class not in self.facility_types: self.facility_types[facility_class] = facility_class_id facility_class_id += 1 self.state_calculator = StateCalculator(self) self.reward_calculator = RewardCalculator(env_config) self.action_calculator = ActionCalculator(self) self.action_space_producer = MultiDiscrete([ 1, # unit price 1, # production rate level ]) # self.action_space_consumer = MultiDiscrete([ # self.max_sources_per_facility, # consumer source id # len(Utils.get_consumer_action_space()) # consumer_quantity # ]) self.action_space_consumer = Discrete(len(Utils.get_consumer_action_space())) example_state, _ = self.state_calculator.world_to_state(self.world) state_dim = len(list(example_state.values())[0]) # 计算状态空间的大小,每个facility对应一个完整的状态 self.observation_space = Box(low=-300.00, high=300.00, shape=(state_dim, ), dtype=np.float64)
def __init__(self, facility, sources, economy): self.facility = facility self.sources = sources self.open_orders = {} self.economy = economy self.products_received = 0 self.lost_product_value = 0 self.latest_consumptions = [ 0 ] * Utils.get_env_config()['consumption_hist_len']
def action_dictionary_to_control(self, action_dict, world): actions_by_facility = defaultdict(list) for agent_id, action in action_dict.items(): f_id = Utils.agentid_to_fid(agent_id) actions_by_facility[f_id].append((agent_id, action)) controls = {} for f_id, actions in actions_by_facility.items(): controls[f_id] = self._actions_to_control(world.facilities[f_id], actions) return World.Control(facility_controls=controls)
def get_retailer_profit(self): _agent_list = [] _step_balances_idx = [] for i, f in enumerate(self.facility_names): if f.startswith('RetailerCell') and Utils.is_consumer_agent(f): _agent_list.append(f) _step_balances_idx.append(i) _step_balances = [ self.step_balances[0, :, i] for i in _step_balances_idx ] return np.sum(_step_balances)
def reset(self): # print("Rst!") if(self.env_config['training'] and self.env_config['init']=='rnd' and self.copy_world): self.world = self.copy_world elif(self.env_config['training'] and self.env_config['init']=='rst'): self.world.reset() else: self.world = WorldBuilder.create(80, 16) state, _ = self.state_calculator.world_to_state(self.world) if(Utils.get_demand_sampler()=='ONLINE'): self.set_retailer_step(0) # print(state) return state
def _read_df(self): os.makedirs('data/GammaRetail/', exist_ok=True) file_name = f"data/GammaRetail/store{self.store_idx+1}.csv" if os.path.exists(file_name): return pd.read_csv(file_name, parse_dates=[self.dt_col]) sku_info_list = Utils.get_sku_of_store(self.store_idx) data_list = [] for sku_info in sku_info_list: sale_gamma = sku_info['sale_gamma'] sku_name = sku_info['sku_name'] sku_price = sku_info['price'] for i in range(self.total_span): demand = int(np.random.gamma(sale_gamma)) data_list.append([ sku_name, self.start_dt + timedelta(i), demand, sku_price ]) df = pd.DataFrame(data_list, columns=[ self.id_col, self.dt_col, self.sale_col, self.sale_price_col ]) df.to_csv(file_name) return df
def cache_data(self): self.df = self._read_df() self._transfer_to_daily_sale() # id_list = self.df[self.id_col].unique().tolist() id_list = Utils.get_all_skus() dt_min, dt_max = self.df[self.dt_col].min(), self.df[self.dt_col].max() self.total_span = (dt_max - dt_min).days + 1 for id_val in id_list: df_tmp = self.df[self.df[self.id_col] == id_val] df_tmp[f"{self.dt_col}_str"] = df_tmp[self.dt_col].map( lambda x: x.strftime(self.dt_format)) sale_cache_tmp = df_tmp.set_index(f"{self.dt_col}_str").to_dict( 'dict')[self.sale_col] sale_price_cache_tmp = df_tmp.set_index( f"{self.dt_col}_str").to_dict('dict')[self.sale_price_col] date_cache_tmp = df_tmp.set_index(f"{self.dt_col}_str").to_dict( 'dict')[self.dt_col] dt_tmp = dt_min self.sale_ts_cache[id_val] = [] self.sale_price_ts_cache[id_val] = [] self.date_cache[id_val] = [] self.sale_mean[id_val] = df_tmp[self.sale_col].mean() sale_price_mean = df_tmp[self.sale_price_col].mean() while dt_tmp <= dt_max: dt_tmp_str = datetime.strftime(dt_tmp, self.dt_format) if sale_cache_tmp.get(dt_tmp_str) == None: print(f"this day is lose in dataset: {dt_tmp_str}") #print(f"press any key to continue ...") #input() self.sale_ts_cache[id_val].append( sale_cache_tmp.get(dt_tmp_str, 0)) self.sale_price_ts_cache[id_val].append( sale_price_cache_tmp.get(dt_tmp_str, sale_price_mean)) self.date_cache[id_val].append( date_cache_tmp.get(dt_tmp_str, dt_tmp)) dt_tmp = dt_tmp + timedelta(days=1)
def _find_source(self, f_state_info): # stop placing orders when the facility ran out of money # if f_state_info['is_positive_balance'] <= 0: # return (0, 0, 0) self.step += 1 step = ConsumerBaseStockPolicy.step update_interval = ConsumerBaseStockPolicy.update_interval if (not self.static) and step != self.step: ConsumerBaseStockPolicy.step += 1 step = ConsumerBaseStockPolicy.step if step % update_interval == 0: self.update_base_stocks() facility = f_state_info['facility'] facility_type = type(facility) if facility_type not in [ SKUWarehouseUnit, SKUStoreUnit, OuterSKUStoreUnit ]: return 0 # consumer_source_inventory inputs = f_state_info['bom_inputs'] available_inventory = np.array(f_state_info['storage_levels']) inflight_orders = np.array(f_state_info['consumer_in_transit_orders']) booked_inventory = available_inventory + inflight_orders # stop placing orders when the facilty runs out of capacity # if np.sum(booked_inventory) > f_state_info['storage_capacity']: # return 0 most_needed_product_id = None min_ratio = float('inf') for product_id, quantity in enumerate(inputs): if quantity > 0: fulfillment_ratio = booked_inventory[product_id] / quantity if fulfillment_ratio < min_ratio: min_ratio = fulfillment_ratio most_needed_product_id = product_id exporting_sources = [] if most_needed_product_id is not None: for i in range(self.n_sources): for j in range(self.n_products): if f_state_info['consumer_source_export_mask'][ i * self.n_products + j] == 1: exporting_sources.append(i) start_step = ConsumerBaseStockPolicy.start_step shift = step % update_interval base = ConsumerBaseStockPolicy.base_stocks[facility.id][start_step + shift] reorder = base - booked_inventory[most_needed_product_id] # whether replenishment point is reached if reorder <= 0: return 0 factor = ConsumerBaseStockPolicy.stop_order_factor if reorder + np.sum( booked_inventory) > factor * f_state_info['storage_capacity']: reorder = factor * f_state_info['storage_capacity'] - np.sum( booked_inventory) action = reorder / f_state_info['sale_mean'] consumer_quantity = Utils.get_consumer_quantity_action(action) return consumer_quantity
def visualization(env, policies, iteration, policy_mode, basestock=False): policy_mode = policy_mode # + f'_{iteration}' renderer = AsciiWorldRenderer() frame_seq = [] evaluation_epoch_len = env.env_config['evaluation_len'] starter_step = env.env_config['episod_duration']+env.env_config['tail_timesteps'] env.set_iteration(1, 1) # env.env_config.update({'episod_duration': evaluation_epoch_len, 'downsampling_rate': 1}) print( f"Environment: Producer action space {env.action_space_producer}, Consumer action space {env.action_space_consumer}, Observation space {env.observation_space}" , flush=True) obss = env.reset() if basestock: from scheduler.inventory_base_stock_policy import ConsumerBaseStockPolicy ConsumerBaseStockPolicy.facilities = env.world.facilities if Utils.get_demand_sampler()=='ONLINE': env.set_retailer_step(starter_step) _, infos = env.state_calculator.world_to_state(env.world) # policies = {} rnn_states = {} rewards = {} for agent_id in obss.keys(): # policies[agent_id] = load_policy(agent_id) rnn_states[agent_id] = policies[agent_id].get_initial_state() rewards[agent_id] = 0 # Simulation loop tracker = SimulationTracker(evaluation_epoch_len, 1, env.agent_ids()) print(f" === evaluation length {evaluation_epoch_len}, it will take about 1 min ....", flush=True) for epoch in range(evaluation_epoch_len): action_dict = {} for agent_id, obs in obss.items(): policy = policies[agent_id] action, new_state, _ = policy.compute_single_action(obs, state=rnn_states[agent_id], info=infos[agent_id], explore=False) action_dict[agent_id] = action # if agent_id.startswith('SKUStoreUnit') and Utils.is_consumer_agent(agent_id): # print(agent_id, action, rewards[agent_id]) # print(obs.tolist()) obss, rewards, dones, infos = env.step(action_dict) step_balances = {} for agent_id in rewards.keys(): step_balances[agent_id] = env.world.facilities[Utils.agentid_to_fid(agent_id)].economy.step_balance.total() # print(env.world.economy.global_balance().total(), step_balances, rewards) tracker.add_sample(0, epoch, env.world.economy.global_balance().total(), step_balances, rewards) # some stats stock_status = env.get_stock_status() order_in_transit_status = env.get_order_in_transit_status() demand_status = env.get_demand_status() tracker.add_sku_status(0, epoch, stock_status, order_in_transit_status, demand_status) frame = renderer.render(env.world) frame_seq.append(np.asarray(frame)) print(tracker.get_retailer_profit()) if not os.path.exists('output'): os.mkdir('output') if not os.path.exists('output/%s' % policy_mode): os.mkdir('output/%s' % policy_mode) if not os.path.exists(f'output/{policy_mode}/iter_{iteration}'): os.mkdir(f'output/{policy_mode}/iter_{iteration}') # tracker.render("output/%s/plot.png" % policy_mode) tracker.render(f'output/{policy_mode}/iter_{iteration}/plot.png') tracker.render_sku(policy_mode, iteration) print(f" === evaluation length end ", flush=True)
def create(x = 80, y = 32): world = World(x, y) world.grid = [[TerrainCell(xi, yi) for yi in range(y)] for xi in range(x)] def default_economy_config(order_cost=0, initial_balance = initial_balance): return ProductUnit.EconomyConfig(order_cost, initial_balance) # facility placement map_margin = 4 size_y_margins = world.size_y - 2*map_margin supplier_x = 10 retailer_x = 70 n_supplies = Utils.get_supplier_num() suppliers = [] supplier_skus = [] supplier_sources = dict() for i in range(n_supplies): supplier_config = SupplierCell.Config(max_storage_capacity=Utils.get_supplier_capacity(i), unit_storage_cost=Utils.get_supplier_unit_storage_cost(i), fleet_size=Utils.get_supplier_fleet_size(i), unit_transport_cost=Utils.get_supplier_unit_transport_cost(i)) if n_supplies > 1: supplier_y = int(size_y_margins/(n_supplies - 1)*i + map_margin) else: supplier_y = int(size_y_margins/2 + map_margin) f = SupplierCell(supplier_x, supplier_y, world, supplier_config, default_economy_config() ) f.idx_in_config = i f.facility_info = Utils.get_supplier_info(i) f.facility_short_name = Utils.get_supplier_short_name() world.agent_echelon[f.id] = 0 world.place_cell(f) suppliers.append(f) sku_info_list = Utils.get_sku_of_supplier(i) for _, sku_info in enumerate(sku_info_list): bom = BillOfMaterials({}, sku_info['sku_name']) supplier_sku_config = ProductUnit.Config(sources=None, unit_manufacturing_cost=sku_info['cost'], sale_gamma=sku_info.get('sale_gamma', 10), bill_of_materials=bom) sku = SKUSupplierUnit(f, supplier_sku_config, default_economy_config(order_cost=f.facility_info['order_cost']) ) sku.idx_in_config = sku_info['sku_name'] f.sku_in_stock.append(sku) sku.distribution = f.distribution sku.storage = f.storage sku.sku_info = sku_info f.storage.try_add_units({sku_info['sku_name']: sku_info['init_stock']}) supplier_skus.append(sku) if sku_info['sku_name'] not in supplier_sources: supplier_sources[sku_info['sku_name']] = [] supplier_sources[sku_info['sku_name']].append(sku) world.agent_echelon[sku.id] = 0 # distribution n_echelon = Utils.get_num_warehouse_echelon() pre_warehouses = suppliers all_warehouses = [] warehouse_skus = [] pre_warehouse_sources = supplier_sources for echelon in range(n_echelon): echelon_gap = (retailer_x-supplier_x)/(n_echelon+1) echelon_x = int(supplier_x+(echelon+1)*echelon_gap) n_warehouses = Utils.get_warehouse_num(echelon) warehouses = [] warehouse_sources = dict() for i in range(n_warehouses): warehouse_config = WarehouseCell.Config(max_storage_capacity=Utils.get_warehouse_capacity(echelon, i), unit_storage_cost=Utils.get_warehouse_unit_storage_cost(echelon, i), fleet_size=Utils.get_warehouse_fleet_size(echelon, i), unit_transport_cost=Utils.get_warehouse_unit_transport_cost(echelon, i)) if n_warehouses > 1: warehouse_y = int(size_y_margins/(n_warehouses - 1)*i + map_margin) else: warehouse_y = int(size_y_margins/2 + map_margin) w = WarehouseCell(echelon_x, warehouse_y, world, warehouse_config, default_economy_config() ) w.idx_in_config = i w.echelon_level = echelon w.facility_info = Utils.get_warehouse_info(echelon, i) w.facility_short_name = Utils.get_warehouse_short_name(echelon) world.agent_echelon[w.id] = 1+echelon world.place_cell(w) warehouses.append(w) WorldBuilder.connect_cells(world, w, *pre_warehouses) sku_info_list = Utils.get_sku_of_warehouse(echelon, i) for _, sku_info in enumerate(sku_info_list): candidate_upstream_suppliers = pre_warehouse_sources[sku_info['sku_name']] upstream_suppliers = [] for s in candidate_upstream_suppliers: if i in s.facility.facility_info['downstream_facilities']: upstream_suppliers.append(s) bom = BillOfMaterials({sku_info['sku_name']: 1}, sku_info['sku_name']) warehouse_sku_config = ProductUnit.Config(sources=upstream_suppliers, unit_manufacturing_cost=sku_info.get('cost', 10), sale_gamma=sku_info.get('sale_gamma', 10), bill_of_materials=bom) sku = SKUWarehouseUnit(w, warehouse_sku_config, default_economy_config(order_cost= w.facility_info['order_cost']) ) sku.idx_in_config = sku_info['sku_name'] w.sku_in_stock.append(sku) sku.distribution = w.distribution sku.storage = w.storage sku.sku_info = sku_info warehouse_skus.append(sku) w.storage.try_add_units({sku_info['sku_name']: sku_info.get('init_stock', 0)}) if sku_info['sku_name'] not in warehouse_sources: warehouse_sources[sku_info['sku_name']] = [] warehouse_sources[sku_info['sku_name']].append(sku) world.agent_echelon[sku.id] = 1+echelon # update downstreaming sku list in supplier_list for s_sku in upstream_suppliers: s_sku.downstream_skus.append(sku) all_warehouses.extend(warehouses) pre_warehouse_sources = warehouse_sources pre_warehouses = warehouses # final consumers n_stores = Utils.get_store_num() stores = [] store_skus = [] for i in range(n_stores): store_config = RetailerCell.Config(max_storage_capacity=Utils.get_store_capacity(i), unit_storage_cost=Utils.get_store_unit_storage_cost(i), fleet_size=1000, unit_transport_cost=10) if n_stores > 1: retailer_y = int(size_y_margins/(n_stores - 1)*i + map_margin) else: retailer_y = int(size_y_margins/2 + map_margin) r = RetailerCell(retailer_x, retailer_y, world, store_config, default_economy_config() ) r.idx_in_config = i r.facility_info = Utils.get_store_info(i) r.facility_short_name = Utils.get_store_short_name() world.agent_echelon[r.id] = 1+n_echelon world.place_cell(r) stores.append(r) WorldBuilder.connect_cells(world, r, *pre_warehouses) sku_info_list = Utils.get_sku_of_store(i) for _, sku_info in enumerate(sku_info_list): candidate_upstream_warehouses = pre_warehouse_sources[sku_info['sku_name']] upstream_warehouses = [] for s in candidate_upstream_warehouses: if i in s.facility.facility_info['downstream_facilities']: upstream_warehouses.append(s) bom = BillOfMaterials({sku_info['sku_name']: 1}, sku_info['sku_name']) retail_sku_config = ProductUnit.Config(sources=upstream_warehouses, unit_manufacturing_cost=sku_info.get('cost', 10), sale_gamma=sku_info.get('sale_gamma', 10), bill_of_materials=bom) if Utils.get_demand_sampler() == "DYNAMIC_GAMMA": sku = SKUStoreUnit(r, retail_sku_config, default_economy_config(order_cost=r.facility_info['order_cost']) ) elif Utils.get_demand_sampler() == "GAMMA": sale_sampler = gamma_sale_sampler(i) sku = OuterSKUStoreUnit(r, retail_sku_config, default_economy_config(order_cost=r.facility_info['order_cost']), sale_sampler ) else: sale_sampler = online_sale_sampler(f"data/OnlineRetail/store{i+1}_new.csv") sku = OuterSKUStoreUnit(r, retail_sku_config, default_economy_config(order_cost=r.facility_info['order_cost']), sale_sampler ) sku.idx_in_config = sku_info['sku_name'] r.sku_in_stock.append(sku) sku.storage = r.storage sku.sku_info = sku_info r.storage.try_add_units({sku_info['sku_name']: sku_info.get('init_stock', 0)}) store_skus.append(sku) world.agent_echelon[sku.id] = 1+n_echelon # update downstreaming sku list in warehouse_list for w_sku in upstream_warehouses: w_sku.downstream_skus.append(sku) for facility in suppliers + all_warehouses + stores: world.facilities[facility.id] = facility for sku in supplier_skus + warehouse_skus + store_skus: world.facilities[sku.id] = sku if sku.sku_info.get('price', 0) > world.max_price: world.max_price = sku.sku_info.get('price', 0) world.total_echelon = Utils.get_total_echelon() return world
def _init_sale_pred(self): pred_len = Utils.get_env_config()['sale_oracle_len'] return self.get_future_sales(pred_len)
def eval(self, iter, eval_on_trainingset=False): self.switch_mode(eval=True) print(f" == eval iteration {iter} == ") obss = self.env.reset(eval=True, eval_on_trainingset=eval_on_trainingset) _, infos = self.env.state_calculator.world_to_state(self.env.world) rnn_states = {} rewards_all = {} episode_reward_all = {} episode_reward = {} episode_steps = [] episode_step = 0 tracker = SimulationTracker(self.env.done_step, 1, self.env.agent_ids()) for agent_id in obss.keys(): # policies[agent_id] = load_policy(agent_id) rnn_states[agent_id] = self.policies[agent_id].get_initial_state() rewards_all[agent_id] = [] episode_reward_all[agent_id] = [] episode_reward[agent_id] = 0 for i in range(100000): episode_step += 1 actions = {} # print("timestep : ", self.step) # print("Start calculate action ....") for agent_id, obs in obss.items(): policy = self.policies[agent_id] action, new_state, _ = policy.compute_single_action( obs, state=rnn_states[agent_id], info=infos[agent_id], explore=False) actions[agent_id] = action # print(agent_id, " :", policy.__class__, " : ", action) next_obss, rewards, dones, infos = self.env.step(actions) for agent_id, reward in rewards.items(): rewards_all[agent_id].append(reward) episode_reward[agent_id] += reward step_balances = {} for agent_id in rewards.keys(): step_balances[agent_id] = self.env.world.facilities[ Utils.agentid_to_fid( agent_id)].economy.step_balance.total() # print(env.world.economy.global_balance().total(), step_balances, rewards) tracker.add_sample(0, episode_step - 1, self.env.world.economy.global_balance().total(), step_balances, rewards) done = any(dones.values()) if done: obss = self.env.reset(eval=True) episode_steps.append(episode_step) episode_step = 0 for agent_id, reward in episode_reward.items(): episode_reward_all[agent_id].append(reward) episode_reward[agent_id] = 0 break else: obss = next_obss infos = { "rewards_all": rewards_all, "episode_reward_all": episode_reward_all, "epsilon": self.policies[self.policies_to_train[0]].epsilon, "all_step": self.step, "episode_step": sum(episode_steps) / len(episode_steps), "profit": tracker.get_retailer_profit(), } return infos
def _product_ids(self): return Utils.get_all_skus()
def get_future_demand(self, product_id): f_step = (self.step+Utils.get_env_config()['sale_hist_len'])%self.sale_sampler.total_span f_demand, _ = self.economy.market_demand(self.sale_sampler, product_id, f_step) return f_demand
# initializing for base stock policies for epoch in tqdm(range(args.episod)): action_dict = {} for agent_id, obs in obss.items(): policy = base_policies[agent_id] action, _, _ = policy.compute_single_action( obs, state=rnn_states[agent_id], info=infos[agent_id], explore=True) action_dict[agent_id] = action obss, rewards, _, infos = env.step(action_dict) sku_base_stocks = {} time_hrz_len = env_config_for_rendering['sale_hist_len'] for sku_name in Utils.get_all_skus(): supplier_skus = [] for facility in env.world.facilities.values(): if isinstance( facility, ProductUnit) and facility.sku_info['sku_name'] == sku_name: supplier_skus.append(facility) _sku_base_stocks = ConsumerBaseStockPolicy.get_base_stock( supplier_skus, time_hrz_len) sku_base_stocks.update(_sku_base_stocks) def load_policy(agent_id): _facility = env.world.facilities[Utils.agentid_to_fid(agent_id)] if Utils.is_producer_agent(agent_id): return ProducerBaselinePolicy( env.observation_space, env.action_space_producer,