def agent_ids(self): agents = [] for f_id in self.world.facilities.keys(): agents.append(Utils.agentid_producer(f_id)) for f_id in self.world.facilities.keys(): agents.append(Utils.agentid_consumer(f_id)) return agents
def _retailer_profit(self, env, step_outcome): # 终端(Retailer)营业额 wc = self.env_config['global_reward_weight_consumer'] parent_facility_balance = dict() # 计算SKU的Reward的时候,将其所属的Store的Reward也计算在内(以一定的权重wc) for facility in env.world.facilities.values(): if isinstance(facility, ProductUnit): parent_facility_balance[ facility.id] = step_outcome.facility_step_balance_sheets[ facility.facility.id] else: parent_facility_balance[ facility.id] = step_outcome.facility_step_balance_sheets[ facility.id] consumer_reward_by_facility = { f_id: wc * parent_facility_balance[f_id] + (1 - wc) * reward for f_id, reward in step_outcome.facility_step_balance_sheets.items() } rewards_by_agent = {} for f_id, reward in step_outcome.facility_step_balance_sheets.items(): rewards_by_agent[Utils.agentid_producer(f_id)] = reward / 1000000.0 for f_id, reward in consumer_reward_by_facility.items(): rewards_by_agent[Utils.agentid_consumer(f_id)] = reward / 1000000.0 return rewards_by_agent
def world_to_state(self, world): state = {} for facility_id, facility in world.facilities.items(): f_state = self._state(facility) self._add_global_features(f_state, world) state[Utils.agentid_producer(facility_id)] = f_state state[Utils.agentid_consumer(facility_id)] = f_state return self._serialize_state(state), state