def _actions_to_control(self, facility, actions): control = FacilityCell.Control(unit_price=0, production_rate=0, consumer_product_id=0, consumer_source_id=0, consumer_quantity=0, consumer_vlt=0) consumer_action_list = Utils.get_consumer_action_space() if isinstance(facility, FacilityCell): return control for agent_id, action in actions: # action = np.array(action).flatten() if Utils.is_producer_agent(agent_id): if isinstance(facility, SKUSupplierUnit): control.production_rate = facility.sku_info[ 'production_rate'] if Utils.is_consumer_agent(agent_id): product_id = facility.bom.output_product_id control.consumer_product_id = product_id if facility.consumer.sources is not None: source = facility.consumer.sources[0] control.consumer_vlt = source.sku_info['vlt'] control.consumer_source_id = 0 # action[0] control.consumer_quantity = int(consumer_action_list[action] * facility.get_sale_mean()) return control
def __init__(self, env_config): self.env_config = env_config if(self.env_config['training'] and env_config['init']=='rnd'): self.copy_world = None self.world_idx = rnd.randint(1, env_config['episod_duration']) self.world = WorldBuilder.create(80, 16) self.current_iteration = 0 self.n_iterations = 0 self.policies = None # self.trainer = None self.product_ids = self._product_ids() # 存储当前最大的前置商品数量和车辆数量 self.max_sources_per_facility = 0 self.max_fleet_size = 0 self.facility_types = {} facility_class_id = 0 for f in self.world.facilities.values(): if isinstance(f, FacilityCell): sources_num = 0 for sku in f.sku_in_stock: if sku.consumer is not None and sku.consumer.sources is not None: sources_num = len(sku.consumer.sources) if sources_num > self.max_sources_per_facility: self.max_sources_per_facility = sources_num if f.distribution is not None: if len(f.distribution.fleet) > self.max_fleet_size: self.max_fleet_size = len(f.distribution.fleet) facility_class = f.__class__.__name__ if facility_class not in self.facility_types: self.facility_types[facility_class] = facility_class_id facility_class_id += 1 self.state_calculator = StateCalculator(self) self.reward_calculator = RewardCalculator(env_config) self.action_calculator = ActionCalculator(self) self.action_space_producer = MultiDiscrete([ 1, # unit price 1, # production rate level ]) # self.action_space_consumer = MultiDiscrete([ # self.max_sources_per_facility, # consumer source id # len(Utils.get_consumer_action_space()) # consumer_quantity # ]) self.action_space_consumer = Discrete(len(Utils.get_consumer_action_space())) example_state, _ = self.state_calculator.world_to_state(self.world) state_dim = len(list(example_state.values())[0]) # 计算状态空间的大小,每个facility对应一个完整的状态 self.observation_space = Box(low=-300.00, high=300.00, shape=(state_dim, ), dtype=np.float64)