def _load_orders_from_bin(order_file_path: str) -> Dict[int, List[Order]]: orders: Dict[int, List[Order]] = {} reader = BinaryReader(order_file_path) for order in reader.items(): tick = order.timestamp if tick not in orders: orders[tick] = [] orders[tick].append( Order(order.timestamp, order.src_port_index, order.dest_port_index, order.quantity)) return orders
def _init(self): self._load_configs() self._register_events() self._citi_bike_data_pipeline = None # Time zone we used to transfer UTC to target time zone. self._time_zone = gettz(self._conf["time_zone"]) # Our weather table used to query weather by date. weather_data_path = self._conf["weather_data"] if weather_data_path.startswith("~"): weather_data_path = os.path.expanduser(weather_data_path) trip_data_path = self._conf["trip_data"] if trip_data_path.startswith("~"): trip_data_path = os.path.expanduser(trip_data_path) if (not os.path.exists(weather_data_path)) or (not os.path.exists(trip_data_path)): self._build_temp_data() self._weather_lut = WeatherTable(self._conf["weather_data"], self._time_zone) self._trip_reader = BinaryReader(self._conf["trip_data"]) # We keep this used to calculate real datetime to get weather and holiday info. self._trip_start_date: datetime.datetime = self._trip_reader.start_datetime # Since binary data hold UTC timestamp, convert it into our target timezone. self._trip_start_date = self._trip_start_date.astimezone(self._time_zone) # Used to cache last date we updated the station additional features to avoid to much time updating. self._last_date: datetime.datetime = None # Filter data with tick range by minute (time_unit='m'). self._item_picker = self._trip_reader.items_tick_picker(self._start_tick, self._max_tick, time_unit="m") # We use this to initializing frame and stations states. stations_states = get_station_info(self._conf["stations_init_data"]) self._init_frame(len(stations_states)) self._init_stations(stations_states) self._init_adj_matrix() # Our decision strategy to determine when we need an action. self._decision_strategy = BikeDecisionStrategy( self._stations, self._distance_adj, self._snapshots, self._conf["decision"])
def test_convert_without_events(self): out_dir = tempfile.mkdtemp() out_bin = os.path.join(out_dir, "trips.bin") meta_file = os.path.join("tests", "data", "data_lib", "case_2", "meta.yml") csv_file = os.path.join("tests", "data", "data_lib", "trips.csv") bct = BinaryConverter(out_bin, meta_file) bct.add_csv(csv_file) # flush will close the file, cannot add again bct.flush() reader = BinaryReader(out_bin) meta: BinaryMeta = reader.meta self.assertIsNotNone(meta) # check events self.assertListEqual(["require_bike", "return_bike", "rebalance_bike", "deliver_bike"], [event.display_name for event in meta.events]) self.assertListEqual(["RequireBike", "ReturnBike", "RebalanceBike", "DeliverBike"], [event.type_name for event in meta.events]) self.assertEqual("RequireBike", meta.default_event_name) self.assertIsNone(meta.event_attr_name)
def __init__(self, event_buffer: EventBuffer, topology: str, start_tick: int, max_tick: int, snapshot_resolution: int, max_snapshots: int, additional_options: dict = {}): super().__init__(scenario_name="vm_scheduling", event_buffer=event_buffer, topology=topology, start_tick=start_tick, max_tick=max_tick, snapshot_resolution=snapshot_resolution, max_snapshots=max_snapshots, additional_options=additional_options) # Initialize environment metrics. self._init_metrics() # Load configurations. self._load_configs() self._register_events() self._init_frame() # Initialize simulation data. self._init_data() # Data center structure for quick accessing. self._init_structure() # All living VMs. self._live_vms: Dict[int, VirtualMachine] = {} # All request payload of the pending decision VMs. # NOTE: Need naming suggestestion. self._pending_vm_request_payload: Dict[int, VmRequestPayload] = {} self._vm_reader = BinaryReader(self._config.VM_TABLE) self._vm_item_picker = self._vm_reader.items_tick_picker( self._start_tick, self._max_tick, time_unit="s") self._cpu_reader = CpuReader(data_path=self._config.CPU_READINGS, start_tick=self._start_tick) self._tick: int = 0 self._pending_action_vm_id: int = 0
def _load_stops_from_bin(stops_file_path: str, vessel_number: int) -> List[List[Stop]]: stops: List[List[Stop]] = [] for _ in range(vessel_number): stops.append([]) reader = BinaryReader(stops_file_path) for stop_item in reader.items(): vessel_stops: List[Stop] = stops[stop_item.vessel_index] stop = Stop(len(vessel_stops), stop_item.timestamp, stop_item.leave_tick, stop_item.port_index, stop_item.vessel_index) vessel_stops.append(stop) return stops
def __init__( self, ilp_config: DottableDict, pm_capacity: np.ndarray, vm_table_path: str, env_start_tick: int, env_duration: int, simulation_logger: Logger, ilp_logger: Logger, log_path: str ): self._simulation_logger = simulation_logger self._ilp_logger = ilp_logger self._allocation_counter = Counter() pm_capacity: List[IlpPmCapacity] = [IlpPmCapacity(core=pm[0], mem=pm[1]) for pm in pm_capacity] self.ilp = VmSchedulingILP(config=ilp_config, pm_capacity=pm_capacity, logger=ilp_logger, log_path=log_path) self.ilp_plan_window_size = ilp_config.plan_window_size self.ilp_apply_buffer_size = ilp_config.apply_buffer_size # Use the vm_item_picker to get the precise vm request info. self.vm_reader = BinaryReader(vm_table_path) self.vm_item_picker = self.vm_reader.items_tick_picker( env_start_tick, env_start_tick + env_duration, time_unit="s" ) # Used to keep the info already read from the vm_item_picker. self.vm_req_dict = defaultdict(list) self.env_tick_in_vm_req_dict = [] self.allocated_vm_dict = {} self.refreshed_allocated_vm_dict = {} self.last_solution_env_tick = -1 self._vm_id_to_idx = {} self.future_vm_req: List[IlpVmInfo] = [] self.allocated_vm: List[IlpVmInfo] = []
class CitibikeBusinessEngine(AbsBusinessEngine): def __init__(self, event_buffer: EventBuffer, topology: Optional[str], start_tick: int, max_tick: int, snapshot_resolution: int, max_snapshots: Optional[int], additional_options: dict = {}): super().__init__("citi_bike", event_buffer, topology, start_tick, max_tick, snapshot_resolution, max_snapshots, additional_options) # Trip binary reader. self._trip_reader: BinaryReader = None # Update self._config_path with current file path. self.update_config_root_path(__file__) # Holidays for US, as we are using NY data. self._us_holidays = holidays.US() # Our stations list used for quick accessing. self._stations: List[Station] = [] self._total_trips: int = 0 self._total_shortages: int = 0 self._total_operate_num: int = 0 self._init() @property def frame(self) -> FrameBase: """FrameBase: Current frame.""" return self._frame @property def snapshots(self) -> SnapshotList: """SnapshotList: Current snapshot list.""" return self._snapshots @property def configs(self) -> dict: """dict: Current configuration.""" return self._conf def step(self, tick: int): """Push business engine to next step. Args: tick (int): Current tick to process. """ # If we do not set auto event, then we need to push it manually. for trip in self._item_picker.items(tick): # Generate a trip event, to dispatch to related callback to process this requirement. trip_evt = self._event_buffer.gen_atom_event( tick, CitiBikeEvents.RequireBike, payload=trip) self._event_buffer.insert_event(trip_evt) if self._decision_strategy.is_decision_tick(tick): # Generate an event, so that we can do the checking after all the trip requirement processed. decision_checking_evt = self._event_buffer.gen_atom_event( tick, CitiBikeEvents.RebalanceBike) self._event_buffer.insert_event(decision_checking_evt) # Update our additional features that not trip related. self._update_station_extra_features(tick) def post_step(self, tick: int): # We following the snapshot_resolution settings to take snapshot. if (tick + 1) % self._snapshot_resolution == 0: # NOTE: We should use frame_index method to get correct index in snapshot list. self._frame.take_snapshot(self.frame_index(tick)) # We reset the station station each resolution. for station in self._stations: station.shortage = 0 station.trip_requirement = 0 station.extra_cost = 0 station.transfer_cost = 0 station.fulfillment = 0 station.failed_return = 0 station.min_bikes = station.bikes # Stop current episode if we reach max tick. return tick + 1 == self._max_tick def get_node_mapping(self) -> dict: """dict: Node mapping of current stations.""" node_mapping = {} for station in self._stations: node_mapping[station.index] = station.id return node_mapping def get_event_payload_detail(self) -> dict: """dict: Event payload details of current scenario.""" return { CitiBikeEvents.RequireBike.name: list(self._trip_reader.meta.columns.keys()), CitiBikeEvents.ReturnBike.name: BikeReturnPayload.summary_key, CitiBikeEvents.RebalanceBike.name: DecisionEvent.summary_key, CitiBikeEvents.DeliverBike.name: BikeTransferPayload.summary_key } def reset(self, keep_seed: bool = False): """Reset internal states for episode.""" self._total_trips = 0 self._total_operate_num = 0 self._total_shortages = 0 self._frame.reset() self._snapshots.reset() self._trip_reader.reset() self._item_picker = self._trip_reader.items_tick_picker( self._start_tick, self._max_tick, time_unit="m") for station in self._stations: station.reset() self._matrices_node.reset() self._decision_strategy.reset() self._last_date = None def get_agent_idx_list(self) -> List[int]: """Get a list of agent index. Returns: list: List of agent index. """ return [station.index for station in self._stations] def get_metrics(self) -> DocableDict: """Get current metrics information. Note: Call this method at different time will get different result. Returns: dict: Metrics information. """ return DocableDict( metrics_desc, { 'trip_requirements': self._total_trips, 'bike_shortage': self._total_shortages, 'operation_number': self._total_operate_num }) def __del__(self): """Collect resource by order.""" self._item_picker = None if self._trip_reader: # Close binary reader first, so that we can clean it correctly. self._trip_reader.close() def _init(self): self._load_configs() self._register_events() self._citi_bike_data_pipeline = None # Time zone we used to transfer UTC to target time zone. self._time_zone = gettz(self._conf["time_zone"]) # Our weather table used to query weather by date. weather_data_path = self._conf["weather_data"] if weather_data_path.startswith("~"): weather_data_path = os.path.expanduser(weather_data_path) trip_data_path = self._conf["trip_data"] if trip_data_path.startswith("~"): trip_data_path = os.path.expanduser(trip_data_path) if (not os.path.exists(weather_data_path)) or ( not os.path.exists(trip_data_path)): self._build_temp_data() self._weather_lut = WeatherTable(self._conf["weather_data"], self._time_zone) self._trip_reader = BinaryReader(self._conf["trip_data"]) # We keep this used to calculate real datetime to get weather and holiday info. self._trip_start_date: datetime.datetime = self._trip_reader.start_datetime # Since binary data hold UTC timestamp, convert it into our target timezone. self._trip_start_date = self._trip_start_date.astimezone( self._time_zone) # Used to cache last date we updated the station additional features to avoid to much time updating. self._last_date: datetime.datetime = None # Filter data with tick range by minute (time_unit='m'). self._item_picker = self._trip_reader.items_tick_picker( self._start_tick, self._max_tick, time_unit="m") # We use this to initializing frame and stations states. stations_states = get_station_info(self._conf["stations_init_data"]) self._init_frame(len(stations_states)) self._init_stations(stations_states) self._init_adj_matrix() # Our decision strategy to determine when we need an action. self._decision_strategy = BikeDecisionStrategy(self._stations, self._distance_adj, self._snapshots, self._conf["decision"]) def _load_configs(self): """Load configurations""" with open(os.path.join(self._config_path, "config.yml")) as fp: self._conf = safe_load(fp) def _init_stations(self, stations_states: list): # After frame initializing, it will help us create the station instances, let's create a reference. # The attribute is added by frame that as same defined in frame definition. # NOTE: This is the build in station list that index start from 0, # we need to create a mapping for it, as our trip data only contains id. self._stations = self._frame.stations for state in stations_states: # Get related station, and set the init states. station = self._stations[state.index] station.set_init_state(state.bikes, state.capacity, state.id) def _init_adj_matrix(self): # Our distance adj. Assume that the adj is NxN without header. distance_adj = np.array( load_adj_from_csv(self._conf["distance_adj_data"], skiprows=1)) # We only have one node here. self._matrices_node = self._frame.matrices[0] station_num = len(self._stations) self._distance_adj = distance_adj.reshape(station_num, station_num) # Add wrapper to it to make it easy to use, with this we can get value by: # 1. self._trips_adj[x, y]. # 2. self._trips_adj.get_row(0). # 3. self._trips_adj.get_column(0). self._trips_adj = MatrixAttributeAccessor(self._matrices_node, "trips_adj", station_num, station_num) def _init_frame(self, station_num: int): self._frame = build_frame(station_num, self.calc_max_snapshots()) self._snapshots = self._frame.snapshots def _register_events(self): # Register our own events and their callback handlers. self._event_buffer.register_event_handler(CitiBikeEvents.RequireBike, self._on_required_bike) self._event_buffer.register_event_handler(CitiBikeEvents.ReturnBike, self._on_bike_returned) self._event_buffer.register_event_handler(CitiBikeEvents.RebalanceBike, self._on_rebalance_bikes) self._event_buffer.register_event_handler(CitiBikeEvents.DeliverBike, self._on_bike_deliver) # Decision event, predefined in event buffer. self._event_buffer.register_event_handler(MaroEvents.TAKE_ACTION, self._on_action_received) def _tick_2_date(self, tick: int): # Get current date to update additional info. # NOTE: We do not need hour and minutes for now. return (self._trip_start_date + relativedelta(minutes=tick)).date() def _update_station_extra_features(self, tick: int): """Update features that not related to trips.""" cur_datetime = self._tick_2_date(tick) if self._last_date == cur_datetime: return self._last_date = cur_datetime weather_info = self._weather_lut[cur_datetime] weekday = cur_datetime.weekday() holiday = cur_datetime in self._us_holidays # Default weather and temperature. weather = 0 temperature = 0 if weather_info is not None: weather = weather_info.weather temperature = weather_info.temp for station in self._stations: station.weekday = weekday station.holiday = holiday station.weather = weather station.temperature = temperature def _on_required_bike(self, evt: AtomEvent): """Callback when there is a trip requirement generated.""" trip = evt.payload station_idx: int = trip.src_station station: Station = self._stations[station_idx] station_bikes = station.bikes # Update trip count, each item only contains 1 requirement. station.trip_requirement += 1 # Statistics for metrics. self._total_trips += 1 self._trips_adj[station_idx, trip.dest_station] += 1 if station_bikes < 1: station.shortage += 1 self._total_shortages += 1 else: station.fulfillment += 1 station.bikes = station_bikes - 1 # Generate a bike return event by end tick. return_payload = BikeReturnPayload(station_idx, trip.dest_station, 1) # Durations from csv file is in seconds, convert it into minutes. return_tick = evt.tick + trip.durations bike_return_evt = self._event_buffer.gen_atom_event( return_tick, CitiBikeEvents.ReturnBike, payload=return_payload) self._event_buffer.insert_event(bike_return_evt) def _on_bike_returned(self, evt: AtomEvent): """Callback when there is a bike returned to a station.""" payload: BikeReturnPayload = evt.payload station: Station = self._stations[payload.to_station_idx] station_bikes = station.bikes return_number = payload.number empty_docks = station.capacity - station_bikes max_accept_number = min(empty_docks, return_number) if max_accept_number < return_number: src_station = self._stations[payload.from_station_idx] additional_bikes = return_number - max_accept_number station.failed_return += additional_bikes # We have to move additional bikes to neighbors. self._decision_strategy.move_to_neighbor(src_station, station, additional_bikes) station.bikes = station_bikes + max_accept_number def _on_rebalance_bikes(self, evt: AtomEvent): """Callback when need to check if we should send decision event to agent.""" # Get stations that need an action. stations_need_decision = self._decision_strategy.get_stations_need_decision( evt.tick) if len(stations_need_decision) > 0: # Generate a decision event. for station_idx, decision_type in stations_need_decision: decision_payload = DecisionEvent( station_idx, evt.tick, self.frame_index(evt.tick), self._decision_strategy.action_scope, decision_type) decision_evt = self._event_buffer.gen_decision_event( evt.tick, decision_payload) self._event_buffer.insert_event(decision_evt) def _on_bike_deliver(self, evt: AtomEvent): """Callback when our transferred bikes reach the destination.""" payload: BikeTransferPayload = evt.payload station: Station = self._stations[payload.to_station_idx] station_bikes = station.bikes transfered_number = payload.number empty_docks = station.capacity - station_bikes max_accept_number = min(empty_docks, transfered_number) if max_accept_number < transfered_number: src_station = self._stations[payload.from_station_idx] self._decision_strategy.move_to_neighbor( src_station, station, transfered_number - max_accept_number) if max_accept_number > 0: station.transfer_cost += max_accept_number self._total_operate_num += max_accept_number station.bikes = station_bikes + max_accept_number def _on_action_received(self, evt: AtomEvent): """Callback when we get an action from agent.""" action = None if evt is None or evt.payload is None: return for action in evt.payload: from_station_idx: int = action.from_station_idx to_station_idx: int = action.to_station_idx # Ignore invalid cell idx. if from_station_idx < 0 or to_station_idx < 0: continue station: Station = self._stations[from_station_idx] station_bikes = station.bikes executed_number = min(station_bikes, action.number) # Insert into event buffer if we have bikes to transfer. if executed_number > 0: station.bikes = station_bikes - executed_number payload = BikeTransferPayload(from_station_idx, to_station_idx, executed_number) transfer_time = self._decision_strategy.transfer_time transfer_evt = self._event_buffer.gen_atom_event( evt.tick + transfer_time, CitiBikeEvents.DeliverBike, payload) self._event_buffer.insert_event(transfer_evt) def _build_temp_data(self): """Build temporary data for predefined environment.""" logger.warning_yellow( f"Binary data files for scenario: citi_bike topology: {self._topology} not found." ) citi_bike_process = CitiBikeProcess(is_temp=True) if self._topology in citi_bike_process.topologies: pid = str(os.getpid()) logger.warning_yellow( f"Generating temp binary data file for scenario: citi_bike topology: {self._topology} pid: {pid}. " "If you want to keep the data, please use MARO CLI command " f"'maro env data generate -s citi_bike -t {self._topology}' to generate the binary data files first." ) self._citi_bike_data_pipeline = citi_bike_process.topologies[ self._topology] self._citi_bike_data_pipeline.download() self._citi_bike_data_pipeline.clean() self._citi_bike_data_pipeline.build() build_folders = self._citi_bike_data_pipeline.get_build_folders() trip_folder = build_folders["trip"] weather_folder = build_folders["weather"] self._conf["weather_data"] = chagne_file_path( self._conf["weather_data"], weather_folder) self._conf["trip_data"] = chagne_file_path(self._conf["trip_data"], trip_folder) self._conf["stations_init_data"] = chagne_file_path( self._conf["stations_init_data"], trip_folder) self._conf["distance_adj_data"] = chagne_file_path( self._conf["distance_adj_data"], trip_folder) else: raise CommandError( "generate", f"Can not generate data files for scenario: citi_bike topology: {self._topology}" )
class VmSchedulingBusinessEngine(AbsBusinessEngine): def __init__(self, event_buffer: EventBuffer, topology: str, start_tick: int, max_tick: int, snapshot_resolution: int, max_snapshots: int, additional_options: dict = {}): super().__init__(scenario_name="vm_scheduling", event_buffer=event_buffer, topology=topology, start_tick=start_tick, max_tick=max_tick, snapshot_resolution=snapshot_resolution, max_snapshots=max_snapshots, additional_options=additional_options) # Initialize environment metrics. self._init_metrics() # Load configurations. self._load_configs() self._register_events() self._init_frame() # Initialize simulation data. self._init_data() # PMs list used for quick accessing. self._init_pms() # All living VMs. self._live_vms: Dict[int, VirtualMachine] = {} # All request payload of the pending decision VMs. # NOTE: Need naming suggestestion. self._pending_vm_request_payload: Dict[int, VmRequestPayload] = {} self._vm_reader = BinaryReader(self._config.VM_TABLE) self._vm_item_picker = self._vm_reader.items_tick_picker( self._start_tick, self._max_tick, time_unit="s") self._cpu_reader = CpuReader(data_path=self._config.CPU_READINGS, start_tick=self._start_tick) self._tick: int = 0 self._pending_action_vm_id: int = 0 @property def configs(self) -> dict: """dict: Current configuration.""" return self._config @property def frame(self) -> FrameBase: """FrameBase: Current frame.""" return self._frame @property def snapshots(self) -> SnapshotList: """SnapshotList: Current snapshot list.""" return self._snapshots def _load_configs(self): """Load configurations.""" # Update self._config_path with current file path. self.update_config_root_path(__file__) with open(os.path.join(self._config_path, "config.yml")) as fp: self._config = convert_dottable(safe_load(fp)) self._delay_duration: int = self._config.DELAY_DURATION self._buffer_time_budget: int = self._config.BUFFER_TIME_BUDGET # Oversubscription rate. self._max_cpu_oversubscription_rate: float = self._config.MAX_CPU_OVERSUBSCRIPTION_RATE self._max_memory_oversubscription_rate: float = self._config.MAX_MEM_OVERSUBSCRIPTION_RATE self._max_utilization_rate: float = self._config.MAX_UTILIZATION_RATE # Load PM related configs. self._pm_amount: int = self._cal_pm_amount() self._kill_all_vms_if_overload = self._config.KILL_ALL_VMS_IF_OVERLOAD def _init_metrics(self): # Env metrics. self._total_vm_requests: int = 0 self._total_energy_consumption: float = 0.0 self._successful_allocation: int = 0 self._successful_completion: int = 0 self._failed_allocation: int = 0 self._failed_completion: int = 0 self._total_latency: Latency = Latency() self._total_oversubscriptions: int = 0 self._total_overload_pms: int = 0 self._total_overload_vms: int = 0 def _init_data(self): """If the file does not exist, then trigger the short data pipeline to download the processed data.""" vm_table_data_path = self._config.VM_TABLE if vm_table_data_path.startswith("~"): vm_table_data_path = os.path.expanduser(vm_table_data_path) cpu_readings_data_path = self._config.CPU_READINGS if cpu_readings_data_path.startswith("~"): cpu_readings_data_path = os.path.expanduser(cpu_readings_data_path) if (not os.path.exists(vm_table_data_path)) or ( not os.path.exists(cpu_readings_data_path)): logger.info_green("Lack data. Start preparing data.") self._download_processed_data() logger.info_green("Data preparation is finished.") def _cal_pm_amount(self) -> int: amount: int = 0 for pm_type in self._config.PM: amount += pm_type["amount"] return amount def _init_pms(self): """Initialize the physical machines based on the config setting. The PM id starts from 0.""" # TODO: Improve the scalability. Like the use of multiple PM sets. self._machines = self._frame.pms # PM type dictionary. self._pm_type_dict: dict = {} pm_id = 0 for pm_type in self._config.PM: amount = pm_type["amount"] self._pm_type_dict[pm_type["PM_type"]] = pm_type while amount > 0: pm = self._machines[pm_id] pm.set_init_state(id=pm_id, cpu_cores_capacity=pm_type["CPU"], memory_capacity=pm_type["memory"], pm_type=pm_type["PM_type"], oversubscribable=PmState.EMPTY) amount -= 1 pm_id += 1 def reset(self): """Reset internal states for episode.""" self._total_vm_requests: int = 0 self._total_energy_consumption: float = 0.0 self._successful_allocation: int = 0 self._successful_completion: int = 0 self._failed_allocation: int = 0 self._failed_completion: int = 0 self._total_latency: Latency = Latency() self._total_oversubscriptions: int = 0 self._total_overload_pms: int = 0 self._total_overload_vms: int = 0 self._frame.reset() self._snapshots.reset() for pm in self._machines: pm.reset() self._live_vms.clear() self._pending_vm_request_payload.clear() self._vm_reader.reset() self._vm_item_picker = self._vm_reader.items_tick_picker( self._start_tick, self._max_tick, time_unit="s") self._cpu_reader.reset() def _init_frame(self): self._frame = build_frame(self._pm_amount, self.calc_max_snapshots()) self._snapshots = self._frame.snapshots def step(self, tick: int): """Push business to next step. Args: tick (int): Current tick to process. """ self._tick = tick # All vm's cpu utilization at current tick. cur_tick_cpu_utilization = self._cpu_reader.items(tick=tick) # Process finished VMs. self._process_finished_vm() # Update all live VMs CPU utilization. self._update_vm_workload( cur_tick_cpu_utilization=cur_tick_cpu_utilization) # Update all PM CPU utilization. self._update_pm_workload() for vm in self._vm_item_picker.items(tick): # TODO: Batch request support. vm_info = VirtualMachine(id=vm.vm_id, cpu_cores_requirement=vm.vm_cpu_cores, memory_requirement=vm.vm_memory, lifetime=vm.vm_lifetime, sub_id=vm.sub_id, deployment_id=vm.deploy_id, category=VmCategory(vm.vm_category)) if vm.vm_id not in cur_tick_cpu_utilization: raise Exception( f"The VM id: '{vm.vm_id}' does not exist at this tick.") vm_info.add_utilization( cpu_utilization=cur_tick_cpu_utilization[vm.vm_id]) vm_req_payload: VmRequestPayload = VmRequestPayload( vm_info=vm_info, remaining_buffer_time=self._buffer_time_budget) vm_request_event = self._event_buffer.gen_cascade_event( tick=tick, event_type=Events.REQUEST, payload=vm_req_payload) self._event_buffer.insert_event(event=vm_request_event) self._total_vm_requests += 1 def post_step(self, tick: int): # Update energy to the environment metrices. total_energy: float = 0.0 for pm in self._machines: if pm.oversubscribable and pm.cpu_cores_allocated > pm.cpu_cores_capacity: self._total_oversubscriptions += 1 total_energy += pm.energy_consumption # Overload PMs. if pm.cpu_utilization > 100: self._overload(pm.id) self._total_energy_consumption += total_energy if (tick + 1) % self._snapshot_resolution == 0: # NOTE: We should use frame_index method to get correct index in snapshot list. self._frame.take_snapshot(self.frame_index(tick)) # Stop current episode if we reach max tick. return tick + 1 >= self._max_tick def get_event_payload_detail(self) -> dict: """dict: Event payload details of current scenario.""" return { Events.REQUEST.name: VmRequestPayload.summary_key, MaroEvents.PENDING_DECISION.name: DecisionPayload.summary_key } def get_agent_idx_list(self) -> List[int]: """Get a list of agent index.""" pass def get_node_mapping(self) -> dict: """dict: Node mapping.""" node_mapping = {} return node_mapping def get_vm_cpu_utilization_series(self, vm_id: int) -> List[float]: """Get the CPU utilization series of the specific VM by the given ID.""" if vm_id in self._live_vms: return self._live_vms[vm_id].get_historical_utilization_series( cur_tick=self._tick) return [] def get_metrics(self) -> DocableDict: """Get current environment metrics information. Returns: DocableDict: Metrics information. """ return DocableDict( metrics_desc, total_vm_requests=self._total_vm_requests, total_energy_consumption=self._total_energy_consumption, successful_allocation=self._successful_allocation, successful_completion=self._successful_completion, failed_allocation=self._failed_allocation, failed_completion=self._failed_completion, total_latency=self._total_latency, total_oversubscriptions=self._total_oversubscriptions, total_overload_pms=self._total_overload_pms, total_overload_vms=self._total_overload_vms) def _register_events(self): # Register our own events and their callback handlers. self._event_buffer.register_event_handler(event_type=Events.REQUEST, handler=self._on_vm_required) # Generate decision event. self._event_buffer.register_event_handler( event_type=MaroEvents.TAKE_ACTION, handler=self._on_action_received) def _update_vm_workload(self, cur_tick_cpu_utilization: dict): """Update all live VMs CPU utilization. The length of VMs utilization series could be difference among all VMs, because index 0 represents the VM's CPU utilization at the tick it starts. """ for live_vm in self._live_vms.values(): # NOTE: Some data could be lost. We use -1.0 to represent the missing data. if live_vm.id not in cur_tick_cpu_utilization: live_vm.add_utilization(cpu_utilization=-1.0) else: live_vm.add_utilization( cpu_utilization=cur_tick_cpu_utilization[live_vm.id]) live_vm.cpu_utilization = live_vm.get_utilization( cur_tick=self._tick) for pending_vm_payload in self._pending_vm_request_payload.values(): pending_vm = pending_vm_payload.vm_info if pending_vm.id not in cur_tick_cpu_utilization: pending_vm.add_utilization(cpu_utilization=-1.0) else: pending_vm.add_utilization( cpu_utilization=cur_tick_cpu_utilization[pending_vm.id]) def _update_pm_workload(self): """Update CPU utilization occupied by total VMs on each PM.""" for pm in self._machines: total_pm_cpu_cores_used: float = 0.0 for vm_id in pm.live_vms: vm = self._live_vms[vm_id] total_pm_cpu_cores_used += vm.cpu_utilization * vm.cpu_cores_requirement pm.update_cpu_utilization(vm=None, cpu_utilization=total_pm_cpu_cores_used / pm.cpu_cores_capacity) pm.energy_consumption = self._cpu_utilization_to_energy_consumption( pm_type=self._pm_type_dict[pm.pm_type], cpu_utilization=pm.cpu_utilization) def _overload(self, pm_id: int): """Overload logic. Currently only support killing all VMs on the overload PM and note them as failed allocations. """ # TODO: Future features of overload modeling. # 1. Performance degradation # 2. Quiesce specific VMs. pm: PhysicalMachine = self._machines[pm_id] vm_ids: List[int] = [vm_id for vm_id in pm.live_vms] if self._kill_all_vms_if_overload: for vm_id in vm_ids: self._live_vms.pop(vm_id) pm.deallocate_vms(vm_ids=vm_ids) self._failed_completion += len(vm_ids) self._total_overload_vms += len(vm_ids) def _cpu_utilization_to_energy_consumption( self, pm_type: dict, cpu_utilization: float) -> float: """Convert the CPU utilization to energy consumption. The formulation refers to https://dl.acm.org/doi/epdf/10.1145/1273440.1250665 """ power: float = pm_type["power_curve"]["calibration_parameter"] busy_power: int = pm_type["power_curve"]["busy_power"] idle_power: int = pm_type["power_curve"]["idle_power"] cpu_utilization /= 100 cpu_utilization = min(1, cpu_utilization) return idle_power + (busy_power - idle_power) * ( 2 * cpu_utilization - pow(cpu_utilization, power)) def _postpone_vm_request(self, postpone_type: PostponeType, vm_id: int, remaining_buffer_time: int): """Postpone VM request.""" if remaining_buffer_time >= self._delay_duration: if postpone_type == PostponeType.Resource: self._total_latency.due_to_resource += self._delay_duration elif postpone_type == PostponeType.Agent: self._total_latency.due_to_agent += self._delay_duration postpone_payload = self._pending_vm_request_payload[vm_id] postpone_payload.remaining_buffer_time -= self._delay_duration postpone_event = self._event_buffer.gen_cascade_event( tick=self._tick + self._delay_duration, event_type=Events.REQUEST, payload=postpone_payload) self._event_buffer.insert_event(event=postpone_event) else: # Fail # Pop out VM request payload. self._pending_vm_request_payload.pop(vm_id) # Add failed allocation. self._failed_allocation += 1 def _get_valid_pms(self, vm_cpu_cores_requirement: int, vm_memory_requirement: int, vm_category: VmCategory) -> List[int]: """Check all valid PMs. Args: vm_cpu_cores_requirement (int): The CPU cores requested by the VM. vm_memory_requirement (int): The memory requested by the VM. vm_category (VmCategory): The VM category. Delay-insensitive: 0, Interactive: 1, Unknown: 2. """ # NOTE: Should we implement this logic inside the action scope? valid_pm_list = [] # Delay-insensitive: 0, Interactive: 1, and Unknown: 2. if vm_category == VmCategory.INTERACTIVE or vm_category == VmCategory.UNKNOWN: valid_pm_list = self._get_valid_non_oversubscribable_pms( vm_cpu_cores_requirement=vm_cpu_cores_requirement, vm_memory_requirement=vm_memory_requirement) else: valid_pm_list = self._get_valid_oversubscribable_pms( vm_cpu_cores_requirement=vm_cpu_cores_requirement, vm_memory_requirement=vm_memory_requirement) return valid_pm_list def _get_valid_non_oversubscribable_pms( self, vm_cpu_cores_requirement: int, vm_memory_requirement: int) -> list: valid_pm_list = [] for pm in self._machines: if pm.oversubscribable == PmState.EMPTY or pm.oversubscribable == PmState.NON_OVERSUBSCRIBABLE: # In the condition of non-oversubscription, the valid PMs mean: # PM allocated resource + VM allocated resource <= PM capacity. if (pm.cpu_cores_allocated + vm_cpu_cores_requirement <= pm.cpu_cores_capacity and pm.memory_allocated + vm_memory_requirement <= pm.memory_capacity): valid_pm_list.append(pm.id) return valid_pm_list def _get_valid_oversubscribable_pms( self, vm_cpu_cores_requirement: int, vm_memory_requirement: int) -> List[int]: valid_pm_list = [] for pm in self._machines: if pm.oversubscribable == PmState.EMPTY or pm.oversubscribable == PmState.OVERSUBSCRIBABLE: # In the condition of oversubscription, the valid PMs mean: # 1. PM allocated resource + VM allocated resource <= Max oversubscription rate * PM capacity. # 2. PM CPU usage + VM requirements <= Max utilization rate * PM capacity. if ((pm.cpu_cores_allocated + vm_cpu_cores_requirement <= self._max_cpu_oversubscription_rate * pm.cpu_cores_capacity) and (pm.memory_allocated + vm_memory_requirement <= self._max_memory_oversubscription_rate * pm.memory_capacity) and (pm.cpu_utilization / 100 * pm.cpu_cores_capacity + vm_cpu_cores_requirement <= self._max_utilization_rate * pm.cpu_cores_capacity)): valid_pm_list.append(pm.id) return valid_pm_list def _process_finished_vm(self): """Release PM resource from the finished VM.""" # Get the VM info. vm_id_list = [] for vm in self._live_vms.values(): if vm.deletion_tick == self._tick: # Release PM resources. pm: PhysicalMachine = self._machines[vm.pm_id] pm.cpu_cores_allocated -= vm.cpu_cores_requirement pm.memory_allocated -= vm.memory_requirement pm.deallocate_vms(vm_ids=[vm.id]) # If the VM list is empty, switch the state to empty. if not pm.live_vms: pm.oversubscribable = PmState.EMPTY vm_id_list.append(vm.id) # VM completed task succeed. self._successful_completion += 1 # Remove dead VM. for vm_id in vm_id_list: self._live_vms.pop(vm_id) def _on_vm_required(self, vm_request_event: CascadeEvent): """Callback when there is a VM request generated.""" # Get VM data from payload. payload: VmRequestPayload = vm_request_event.payload vm_info: VirtualMachine = payload.vm_info remaining_buffer_time: int = payload.remaining_buffer_time # Store the payload inside business engine. self._pending_vm_request_payload[vm_info.id] = payload # Get valid pm list. valid_pm_list = self._get_valid_pms( vm_cpu_cores_requirement=vm_info.cpu_cores_requirement, vm_memory_requirement=vm_info.memory_requirement, vm_category=vm_info.category) if len(valid_pm_list) > 0: # Generate pending decision. decision_payload = DecisionPayload( frame_index=self.frame_index(tick=self._tick), valid_pms=valid_pm_list, vm_id=vm_info.id, vm_cpu_cores_requirement=vm_info.cpu_cores_requirement, vm_memory_requirement=vm_info.memory_requirement, remaining_buffer_time=remaining_buffer_time) self._pending_action_vm_id = vm_info.id pending_decision_event = self._event_buffer.gen_decision_event( tick=vm_request_event.tick, payload=decision_payload) vm_request_event.add_immediate_event(event=pending_decision_event) else: # Either postpone the requirement event or failed. self._postpone_vm_request( postpone_type=PostponeType.Resource, vm_id=vm_info.id, remaining_buffer_time=remaining_buffer_time) def _on_action_received(self, event: CascadeEvent): """Callback wen we get an action from agent.""" action = None if event is None or event.payload is None: self._pending_vm_request_payload.pop(self._pending_action_vm_id) return cur_tick: int = event.tick for action in event.payload: vm_id: int = action.vm_id if vm_id not in self._pending_vm_request_payload: raise Exception( f"The VM id: '{vm_id}' sent by agent is invalid.") if type(action) == AllocateAction: pm_id = action.pm_id vm: VirtualMachine = self._pending_vm_request_payload[ vm_id].vm_info lifetime = vm.lifetime # Update VM information. vm.pm_id = pm_id vm.creation_tick = cur_tick vm.deletion_tick = cur_tick + lifetime vm.cpu_utilization = vm.get_utilization(cur_tick=cur_tick) # Pop out the VM from pending requests and add to live VM dict. self._pending_vm_request_payload.pop(vm_id) self._live_vms[vm_id] = vm # Update PM resources requested by VM. pm = self._machines[pm_id] # Empty pm (init state). if pm.oversubscribable == PmState.EMPTY: # Delay-Insensitive: oversubscribable. if vm.category == VmCategory.DELAY_INSENSITIVE: pm.oversubscribable = PmState.OVERSUBSCRIBABLE # Interactive or Unknown: non-oversubscribable else: pm.oversubscribable = PmState.NON_OVERSUBSCRIBABLE pm.allocate_vms(vm_ids=[vm.id]) pm.cpu_cores_allocated += vm.cpu_cores_requirement pm.memory_allocated += vm.memory_requirement pm.update_cpu_utilization(vm=vm, cpu_utilization=None) pm.energy_consumption = self._cpu_utilization_to_energy_consumption( pm_type=self._pm_type_dict[pm.pm_type], cpu_utilization=pm.cpu_utilization) self._successful_allocation += 1 elif type(action) == PostponeAction: postpone_step = action.postpone_step remaining_buffer_time = self._pending_vm_request_payload[ vm_id].remaining_buffer_time # Either postpone the requirement event or failed. self._postpone_vm_request( postpone_type=PostponeType.Agent, vm_id=vm_id, remaining_buffer_time=remaining_buffer_time - postpone_step * self._delay_duration) def _download_processed_data(self): """Build processed data.""" data_root = StaticParameter.data_root build_folder = os.path.join(data_root, self._scenario_name, ".build", self._topology) source = self._config.PROCESSED_DATA_URL download_file_name = source.split('/')[-1] download_file_path = os.path.join(build_folder, download_file_name) # Download file from the Azure blob storage. if not os.path.exists(download_file_path): logger.info_green( f"Downloading data from {source} to {download_file_path}.") download_file(source=source, destination=download_file_path) else: logger.info_green("File already exists, skipping download.") # Unzip files. logger.info_green(f"Unzip {download_file_path} to {build_folder}") tar = tarfile.open(download_file_path, "r:gz") tar.extractall(path=build_folder) tar.close() # Move to the correct path. for _, directories, _ in os.walk(build_folder): for directory in directories: unzip_file = os.path.join(build_folder, directory) logger.info_green( f"Move files to {build_folder} from {unzip_file}") for file_name in os.listdir(unzip_file): if file_name.endswith(".bin"): shutil.move(os.path.join(unzip_file, file_name), build_folder) os.rmdir(unzip_file)
def test_convert_with_events(self): out_dir = tempfile.mkdtemp() out_bin = os.path.join(out_dir, "trips.bin") meta_file = os.path.join("tests", "data", "data_lib", "case_1", "meta.yml") csv_file = os.path.join("tests", "data", "data_lib", "trips.csv") bct = BinaryConverter(out_bin, meta_file) # add and convert 1st csv file bct.add_csv(csv_file) # add again will append to the end ignore the order bct.add_csv(csv_file) # flush will close the file, cannot add again bct.flush() # check if output exist self.assertTrue(os.path.exists(out_bin)) # check content reader = BinaryReader(out_bin) # start tick should be smallest one start_date = reader.start_datetime self.assertEqual(start_date.year, 2019) self.assertEqual(start_date.month, 1) self.assertEqual(start_date.day, 1) self.assertEqual(start_date.hour, 0) self.assertEqual(start_date.minute, 0) self.assertEqual(start_date.second, 0) end_date = reader.end_datetime self.assertEqual(end_date.year, 2019) self.assertEqual(end_date.month, 1) self.assertEqual(end_date.day, 1) self.assertEqual(end_date.hour, 0) self.assertEqual(end_date.minute, 5) self.assertEqual(end_date.second, 0) # there should be double items as trips.csv self.assertEqual(4*2, reader.header.item_count) # 20 byte self.assertEqual(20, reader.header.item_size) start_station_index = [0, 0, 1, 0] idx = 0 # check iterating interface for item in reader.items(): # check if fields same as meta self.assertTupleEqual(('timestamp', 'durations', 'src_station', 'dest_station'), item._fields) # check item start station index self.assertEqual(start_station_index[idx % len(start_station_index)], item.src_station) idx += 1 # check if filter works as expected l = len([item for item in reader.items(end_time_offset=0, time_unit="m")]) # although there are 2 items that match the condition, but they not sorted, reader will not try to read to the end, but # to the first item which not match the condition self.assertEqual(1, l) l = len([item for item in reader.items(start_time_offset=1, time_unit='m')]) # reader will try to read 1st one that > end tick, so there should be 6 items self.assertEqual(6, l)
# Init an environment for Citi Bike. env = Env( scenario=config.env.scenario, topology=config.env.topology, start_tick=config.env.start_tick, durations=config.env.durations, snapshot_resolution=config.env.resolution, ) # For debug only, used to peep the BE to get the real future data. if PEEP_AND_USE_REAL_DATA: ENV = env TRIP_PICKER = BinaryReader(env.configs["trip_data"]).items_tick_picker( start_time_offset=config.env.start_tick, end_time_offset=(config.env.start_tick + config.env.durations), time_unit="m" ) if config.env.seed is not None: env.set_seed(config.env.seed) # Start simulation. decision_event: DecisionEvent = None action: Action = None is_done: bool = False _, decision_event, is_done = env.step(action=None) # TODO: Update the Env interface. num_station = len(env.agent_idx_list) station_distance_adj = np.array(
class IlpAgent(): def __init__( self, ilp_config: DottableDict, pm_capacity: np.ndarray, vm_table_path: str, env_start_tick: int, env_duration: int, simulation_logger: Logger, ilp_logger: Logger, log_path: str ): self._simulation_logger = simulation_logger self._ilp_logger = ilp_logger self._allocation_counter = Counter() pm_capacity: List[IlpPmCapacity] = [IlpPmCapacity(core=pm[0], mem=pm[1]) for pm in pm_capacity] self.ilp = VmSchedulingILP(config=ilp_config, pm_capacity=pm_capacity, logger=ilp_logger, log_path=log_path) self.ilp_plan_window_size = ilp_config.plan_window_size self.ilp_apply_buffer_size = ilp_config.apply_buffer_size # Use the vm_item_picker to get the precise vm request info. self.vm_reader = BinaryReader(vm_table_path) self.vm_item_picker = self.vm_reader.items_tick_picker( env_start_tick, env_start_tick + env_duration, time_unit="s" ) # Used to keep the info already read from the vm_item_picker. self.vm_req_dict = defaultdict(list) self.env_tick_in_vm_req_dict = [] self.allocated_vm_dict = {} self.refreshed_allocated_vm_dict = {} self.last_solution_env_tick = -1 self._vm_id_to_idx = {} self.future_vm_req: List[IlpVmInfo] = [] self.allocated_vm: List[IlpVmInfo] = [] def choose_action(self, env_tick: int, cur_vm_id: int, live_vm_set_list: List[Set[int]]) -> Action: # Formulate and solve only when the new request goes beyond the apply buffer size of last ILP solution. if self.last_solution_env_tick < 0 or env_tick >= self.last_solution_env_tick + self.ilp_apply_buffer_size: self.last_solution_env_tick = env_tick self._vm_id_to_idx = {} self.future_vm_req.clear() self.allocated_vm.clear() # To clear the outdated vm_req_dict data. pop_num = 0 for i, tick in enumerate(self.env_tick_in_vm_req_dict): if tick < env_tick: self.vm_req_dict.pop(tick) pop_num += 1 else: break self.env_tick_in_vm_req_dict = self.env_tick_in_vm_req_dict[pop_num:] # Read VM data from file. for tick in range(env_tick, env_tick + self.ilp_plan_window_size + 1): if tick not in self.vm_req_dict: self.env_tick_in_vm_req_dict.append(tick) self.vm_req_dict[tick] = [item for item in self.vm_item_picker.items(tick)] # Build the future_vm_req list for ILP. for tick in range(env_tick, env_tick + self.ilp_plan_window_size + 1): for vm in self.vm_req_dict[tick]: vmInfo = IlpVmInfo( id=vm.vm_id, core=vm.vm_cpu_cores, mem=vm.vm_memory, lifetime=vm.vm_lifetime, arrival_env_tick=tick ) if tick < env_tick + self.ilp_apply_buffer_size: self.refreshed_allocated_vm_dict[vm.vm_id] = vmInfo self._vm_id_to_idx[vm.vm_id] = len(self.future_vm_req) self.future_vm_req.append(vmInfo) # Build the allocated_vm list for ILP. for pm_idx in range(len(live_vm_set_list)): for vm_id in live_vm_set_list[pm_idx]: assert vm_id in self.allocated_vm_dict, f"ILP agent: vm_id {vm_id} not in allocated_vm_dict" vm = self.allocated_vm_dict[vm_id] vm.pm_idx = pm_idx self.refreshed_allocated_vm_dict[vm_id] = vm self.allocated_vm.append(vm) self.allocated_vm_dict.clear() self.allocated_vm_dict = self.refreshed_allocated_vm_dict self.refreshed_allocated_vm_dict = {} # Choose action by ILP, may trigger a new formulation and solution, # may directly return the decision if the cur_vm_id is still in the apply buffer size of last solution. chosen_pm_idx = self.ilp.choose_pm(env_tick, cur_vm_id, self.allocated_vm, self.future_vm_req, self._vm_id_to_idx) self._simulation_logger.info(f"tick: {env_tick}, vm: {cur_vm_id} -> pm: {chosen_pm_idx}") if chosen_pm_idx == NOT_ALLOCATE_NOW: return PostponeAction(vm_id=cur_vm_id, postpone_step=1) else: self._allocation_counter[self.future_vm_req[self._vm_id_to_idx[cur_vm_id]].core] += 1 return AllocateAction(vm_id=cur_vm_id, pm_id=chosen_pm_idx) def report_allocation_summary(self): self._simulation_logger.info(f"Allocation Counter(#core, #req): {sorted(self._allocation_counter.items())}")
class CitibikeBusinessEngine(AbsBusinessEngine): def __init__(self, event_buffer: EventBuffer, topology: str, start_tick: int, max_tick: int, snapshot_resolution: int, max_snapshots: int, additional_options: dict = {}): super().__init__("citi_bike", event_buffer, topology, start_tick, max_tick, snapshot_resolution, max_snapshots, additional_options) # trip binary reader self._trip_reader: BinaryReader = None # update self._config_path with current file path self.update_config_root_path(__file__) # holidays for US, as we are using NY data self._us_holidays = holidays.US() # our stations list used for quick accessing self._stations: List[Station] = [] self._total_trips: int = 0 self._total_shortages: int = 0 self._init() @property def frame(self) -> FrameBase: """Current frame""" return self._frame @property def snapshots(self) -> SnapshotList: """Current snapshot list""" return self._snapshots @property def configs(self): return self._conf def rewards(self, actions) -> Union[float, list]: """Calculate rewards based on actions Args: actions(list): Action(s) from agent Returns: float: reward based on actions """ if actions is None: return [] return sum( [self._reward.reward(station.index) for station in self._stations]) def step(self, tick: int): """Push business engine to next step""" # if we do not set auto event, then we need to push it manually for trip in self._item_picker.items(tick): # generate a trip event, to dispatch to related callback to process this requirement trip_evt = self._event_buffer.gen_atom_event( tick, CitiBikeEvents.RequireBike, payload=trip) self._event_buffer.insert_event(trip_evt) if self._decision_strategy.is_decision_tick(tick): # generate an event, so that we can do the checking after all the trip requirement processed decition_checking_evt = self._event_buffer.gen_atom_event( tick, CitiBikeEvents.RebalanceBike) self._event_buffer.insert_event(decition_checking_evt) # update our additional features that not trip related self._update_station_extra_features(tick) def post_step(self, tick: int): # we following the snapshot_resolution settings to take snapshot if (tick + 1) % self._snapshot_resolution == 0: # NOTE: we should use frame_index method to get correct index in snapshot list self._frame.take_snapshot(self.frame_index(tick)) # we reset the station station each resolution for station in self._stations: station.shortage = 0 station.trip_requirement = 0 station.extra_cost = 0 station.transfer_cost = 0 station.fulfillment = 0 station.failed_return = 0 station.min_bikes = station.bikes # stop current episode if we reach max tick return tick + 1 == self._max_tick def get_node_mapping(self) -> dict: return {} def reset(self): """Reset after episode""" self._total_trips = 0 self._total_shortages = 0 self._frame.reset() self._snapshots.reset() self._trip_reader.reset() self._item_picker = self._trip_reader.items_tick_picker( self._start_tick, self._max_tick, time_unit="m") for station in self._stations: station.reset() self._matrices_node.reset() def get_agent_idx_list(self) -> List[int]: return [station.index for station in self._stations] def get_metrics(self) -> dict: """metrics information""" total_trips = self._total_trips total_shortage = self._total_shortages return DocableDict(metrics_desc, perf=(total_trips - total_shortage) / total_trips if total_trips != 0 else 1, total_trips=total_trips, total_shortage=total_shortage) def __del__(self): """Collect resource by order""" self._item_picker = None if self._trip_reader: # close binary reader first, so that we can clean it correctly self._trip_reader.close() def _init(self): self._load_configs() self._register_events() self._citi_bike_data_pipeline = None # time zone we used to transfer UTC to target time zone self._time_zone = gettz(self._conf["time_zone"]) # our weather table used to query weather by date weather_data_path = self._conf["weather_data"] if weather_data_path.startswith("~"): weather_data_path = os.path.expanduser(weather_data_path) trip_data_path = self._conf["trip_data"] if trip_data_path.startswith("~"): trip_data_path = os.path.expanduser(trip_data_path) if (not os.path.exists(weather_data_path)) or ( not os.path.exists(trip_data_path)): self._build_temp_data() self._weather_lut = WeatherTable(self._conf["weather_data"], self._time_zone) self._trip_reader = BinaryReader(self._conf["trip_data"]) # we keep this used to calculate real datetime to get weather and holiday info self._trip_start_date: datetime.datetime = self._trip_reader.start_datetime # since binary data hold UTC timestamp, convert it into our target timezone self._trip_start_date = self._trip_start_date.astimezone( self._time_zone) # used to cache last date we updated the station additional features to avoid to much time updating self._last_date: datetime.datetime = None # filter data with tick range by minute (time_unit='m') self._item_picker = self._trip_reader.items_tick_picker( self._start_tick, self._max_tick, time_unit="m") # we use this to init frame and stations init states stations_states = get_station_info(self._conf["stations_init_data"]) self._init_frame(len(stations_states)) self._init_stations(stations_states) self._init_adj_matrix() # our decision strategy to determine when we need an action self._decision_strategy = BikeDecisionStrategy(self._stations, self._distance_adj, self._snapshots, self._conf["decision"]) self._reward = StationReward(self._stations, self._conf["reward"]) def _load_configs(self): """Load configurations""" with open(os.path.join(self._config_path, "config.yml")) as fp: self._conf = safe_load(fp) def _init_stations(self, stations_states: list): # after frame initializing, it will help us create the station instances, let's create a reference # the attribute is added by frame that as same defined in frame definition # NOTE: this is the build in station list that index start from 0, # we need to create a mapping for it, as our trip data only contains id self._stations = self._frame.stations for state in stations_states: # get related station, and set the init states station = self._stations[state.index] station.set_init_state(state.bikes, state.capacity) def _init_adj_matrix(self): # our distance adj # we assuming that our adj is NxN without header distance_adj = np.array( load_adj_from_csv(self._conf["distance_adj_data"], skiprows=1)) # we only have one node here self._matrices_node = self._frame.matrices[0] station_num = len(self._stations) self._distance_adj = distance_adj.reshape(station_num, station_num) # add wrapper to it to make it easy to use, # with this we can get value by: # 1. self._trips_adj[x, y] # 2. self._trips_adj.get_row(0) # 3. self._trips_adj.get_column(0) self._trips_adj = MatrixAttributeAccessor(self._matrices_node, "trips_adj", station_num, station_num) def _init_frame(self, station_num: int): # TODO: read the station number later self._frame = build_frame(station_num, self.calc_max_snapshots()) self._snapshots = self._frame.snapshots def _register_events(self): # register our own events and their callback handlers self._event_buffer.register_event_handler(CitiBikeEvents.RequireBike, self._on_required_bike) self._event_buffer.register_event_handler(CitiBikeEvents.ReturnBike, self._on_bike_returned) self._event_buffer.register_event_handler(CitiBikeEvents.RebalanceBike, self._on_rebalance_bikes) self._event_buffer.register_event_handler(CitiBikeEvents.DeliverBike, self._on_bike_deliver) # decision event, predefined in event buffer self._event_buffer.register_event_handler(DECISION_EVENT, self._on_action_received) def _tick_2_date(self, tick: int): # get current date to update additional info # NOTE: we do not need hour and minutes for now return (self._trip_start_date + relativedelta(minutes=tick)).date() def _update_station_extra_features(self, tick: int): """update features that not related to trips""" cur_datetime = self._tick_2_date(tick) if self._last_date == cur_datetime: return self._last_date = cur_datetime weather_info = self._weather_lut[cur_datetime] weekday = cur_datetime.weekday() holiday = cur_datetime in self._us_holidays # default weather and temperature weather = 0 temperature = 0 if weather_info is not None: weather = weather_info.weather temperature = weather_info.temp for station in self._stations: station.weekday = weekday station.holiday = holiday station.weather = weather station.temperature = temperature def _on_required_bike(self, evt: Event): """callback when there is a trip requirement generated""" trip = evt.payload station_idx: int = trip.src_station station: Station = self._stations[station_idx] station_bikes = station.bikes # update trip count, each item only contains 1 requirement station.trip_requirement += 1 # statistics for metrics self._total_trips += 1 self._trips_adj[station_idx, trip.dest_station] += 1 if station_bikes < 1: station.shortage += 1 self._total_shortages += 1 else: station.fulfillment += 1 station.bikes = station_bikes - 1 # generate a bike return event by end tick return_payload = BikeReturnPayload(station_idx, trip.dest_station, 1) # durations from csv file is in seconds, convert it into minutes return_tick = evt.tick + trip.durations bike_return_evt = self._event_buffer.gen_atom_event( return_tick, CitiBikeEvents.ReturnBike, payload=return_payload) self._event_buffer.insert_event(bike_return_evt) def _on_bike_returned(self, evt: Event): """callback when there is a bike returned to a station""" payload: BikeReturnPayload = evt.payload station: Station = self._stations[payload.to_station_idx] station_bikes = station.bikes return_number = payload.number empty_docks = station.capacity - station_bikes max_accept_number = min(empty_docks, return_number) if max_accept_number < return_number: src_station = self._stations[payload.from_station_idx] additional_bikes = return_number - max_accept_number station.failed_return += additional_bikes # we have to move additional bikes to neighbors self._decision_strategy.move_to_neighbor(src_station, station, additional_bikes) station.bikes = station_bikes + max_accept_number def _on_rebalance_bikes(self, evt: Event): """callback when need to check if we should send decision event to agent""" # get stations that need an action stations_need_decision = self._decision_strategy.get_stations_need_decision( evt.tick) if len(stations_need_decision) > 0: # generate a decision event for station_idx, decision_type in stations_need_decision: decision_payload = DecisionEvent( station_idx, evt.tick, self.frame_index(evt.tick), self._decision_strategy.action_scope, decision_type) decision_evt = self._event_buffer.gen_cascade_event( evt.tick, DECISION_EVENT, decision_payload) self._event_buffer.insert_event(decision_evt) def _on_bike_deliver(self, evt: Event): """callback when our transferred bikes reach the destination""" payload: BikeTransferPayload = evt.payload station: Station = self._stations[payload.to_station_idx] station_bikes = station.bikes transfered_number = payload.number empty_docks = station.capacity - station_bikes max_accept_number = min(empty_docks, transfered_number) if max_accept_number < transfered_number: src_station = self._stations[payload.from_station_idx] self._decision_strategy.move_to_neighbor( src_station, station, transfered_number - max_accept_number) if max_accept_number > 0: station.transfer_cost += max_accept_number station.bikes = station_bikes + max_accept_number def _on_action_received(self, evt: Event): """callback when we get an action from agent""" action: Action = None if evt is None or evt.payload is None: return for action in evt.payload: from_station_idx: int = action.from_station_idx to_station_idx: int = action.to_station_idx # ignore invalid cell idx if from_station_idx < 0 or to_station_idx < 0: continue station: Station = self._stations[from_station_idx] station_bikes = station.bikes executed_number = min(station_bikes, action.number) # insert into event buffer if we have bikes to transfer if executed_number > 0: station.bikes = station_bikes - executed_number payload = BikeTransferPayload(from_station_idx, to_station_idx, executed_number) transfer_time = self._decision_strategy.transfer_time transfer_evt = self._event_buffer.gen_atom_event( evt.tick + transfer_time, CitiBikeEvents.DeliverBike, payload) self._event_buffer.insert_event(transfer_evt) def _build_temp_data(self): logger.warning_yellow( f"Binary data files for scenario: citi_bike topology: {self._topology} not found." ) citi_bike_process = CitiBikeProcess(is_temp=True) if self._topology in citi_bike_process.topologies: pid = str(os.getpid()) logger.warning_yellow( f"Generating temp binary data file for scenario: citi_bike topology: {self._topology} pid: {pid}. If you want to keep the data, please use MARO CLI command 'maro data generate -s citi_bike -t {self._topology}' to generate the binary data files first." ) self._citi_bike_data_pipeline = citi_bike_process.topologies[ self._topology] self._citi_bike_data_pipeline.download() self._citi_bike_data_pipeline.clean() self._citi_bike_data_pipeline.build() build_folders = self._citi_bike_data_pipeline.get_build_folders() trip_folder = build_folders["trip"] weather_folder = build_folders["weather"] self._conf["weather_data"] = chagne_file_path( self._conf["weather_data"], weather_folder) self._conf["trip_data"] = chagne_file_path(self._conf["trip_data"], trip_folder) self._conf["stations_init_data"] = chagne_file_path( self._conf["stations_init_data"], trip_folder) self._conf["distance_adj_data"] = chagne_file_path( self._conf["distance_adj_data"], trip_folder) else: raise CommandError( "generate", f"Can not generate data files for scenario: citi_bike topology: {self._topology}" )
class VmSchedulingBusinessEngine(AbsBusinessEngine): def __init__(self, event_buffer: EventBuffer, topology: str, start_tick: int, max_tick: int, snapshot_resolution: int, max_snapshots: int, additional_options: dict = {}): super().__init__(scenario_name="vm_scheduling", event_buffer=event_buffer, topology=topology, start_tick=start_tick, max_tick=max_tick, snapshot_resolution=snapshot_resolution, max_snapshots=max_snapshots, additional_options=additional_options) # Env metrics. self._total_vm_requests: int = 0 self._total_energy_consumption: float = 0 self._successful_allocation: int = 0 self._successful_completion: int = 0 self._failed_allocation: int = 0 self._total_latency: Latency = Latency() self._total_oversubscriptions: int = 0 # Load configurations. self._load_configs() self._register_events() self._init_frame() self._init_data() # PMs list used for quick accessing. self._init_pms() # All living VMs. self._live_vms: Dict[int, VirtualMachine] = {} # All request payload of the pending decision VMs. # NOTE: Need naming suggestestion. self._pending_vm_request_payload: Dict[int, VmRequestPayload] = {} self._vm_reader = BinaryReader(self._config.VM_TABLE) self._vm_item_picker = self._vm_reader.items_tick_picker( self._start_tick, self._max_tick, time_unit="s") self._cpu_reader = CpuReader(data_path=self._config.CPU_READINGS, start_tick=self._start_tick) self._tick: int = 0 self._pending_action_vm_id: int = 0 @property def configs(self) -> dict: """dict: Current configuration.""" return self._config @property def frame(self) -> FrameBase: """FrameBase: Current frame.""" return self._frame @property def snapshots(self) -> SnapshotList: """SnapshotList: Current snapshot list.""" return self._snapshots def _load_configs(self): """Load configurations.""" # Update self._config_path with current file path. self.update_config_root_path(__file__) with open(os.path.join(self._config_path, "config.yml")) as fp: self._config = convert_dottable(safe_load(fp)) self._delay_duration: int = self._config.DELAY_DURATION self._buffer_time_budget: int = self._config.BUFFER_TIME_BUDGET self._pm_amount: int = self._config.PM.AMOUNT def _init_data(self): """If the file does not exist, then trigger the short data pipeline to download the processed data.""" vm_table_data_path = self._config.VM_TABLE if vm_table_data_path.startswith("~"): vm_table_data_path = os.path.expanduser(vm_table_data_path) cpu_readings_data_path = self._config.CPU_READINGS if cpu_readings_data_path.startswith("~"): cpu_readings_data_path = os.path.expanduser(cpu_readings_data_path) if (not os.path.exists(vm_table_data_path)) or ( not os.path.exists(cpu_readings_data_path)): self._download_processed_data() def _init_pms(self): """Initialize the physical machines based on the config setting. The PM id starts from 0.""" self._pm_cpu_cores_capacity: int = self._config.PM.CPU self._pm_memory_capacity: int = self._config.PM.MEMORY # TODO: Improve the scalability. Like the use of multiple PM sets. self._machines = self._frame.pms for pm_id in range(self._pm_amount): pm = self._machines[pm_id] pm.set_init_state(id=pm_id, cpu_cores_capacity=self._pm_cpu_cores_capacity, memory_capacity=self._pm_memory_capacity) def reset(self): """Reset internal states for episode.""" self._total_energy_consumption: float = 0.0 self._successful_allocation: int = 0 self._successful_completion: int = 0 self._failed_allocation: int = 0 self._total_latency: Latency = Latency() self._total_oversubscriptions: int = 0 self._frame.reset() self._snapshots.reset() for pm in self._machines: pm.reset() self._live_vms.clear() self._pending_vm_request_payload.clear() self._vm_reader.reset() self._vm_item_picker = self._vm_reader.items_tick_picker( self._start_tick, self._max_tick, time_unit="s") self._cpu_reader.reset() def _init_frame(self): self._frame = build_frame(self._pm_amount, self.calc_max_snapshots()) self._snapshots = self._frame.snapshots def step(self, tick: int): """Push business to next step. Args: tick (int): Current tick to process. """ self._tick = tick # All vm's cpu utilization at current tick. cur_tick_cpu_utilization = self._cpu_reader.items(tick=tick) # Process finished VMs. self._process_finished_vm() # Update all live VMs CPU utilization. self._update_vm_workload( cur_tick_cpu_utilization=cur_tick_cpu_utilization) # Update all PM CPU utilization. self._update_pm_workload() for vm in self._vm_item_picker.items(tick): # TODO: Calculate vm_info = VirtualMachine(id=vm.vm_id, cpu_cores_requirement=vm.vm_cpu_cores, memory_requirement=vm.vm_memory, lifetime=vm.vm_deleted - vm.timestamp + 1) if vm.vm_id not in cur_tick_cpu_utilization: raise Exception( f"The VM id: '{vm.vm_id}' does not exist at this tick.") vm_info.add_utilization( cpu_utilization=cur_tick_cpu_utilization[vm.vm_id]) vm_req_payload: VmRequestPayload = VmRequestPayload( vm_info=vm_info, remaining_buffer_time=self._buffer_time_budget) vm_request_event = self._event_buffer.gen_cascade_event( tick=tick, event_type=Events.REQUEST, payload=vm_req_payload) self._event_buffer.insert_event(event=vm_request_event) self._total_vm_requests += 1 def post_step(self, tick: int): # Update energy to the environment metrices. total_energy: float = 0.0 for pm in self._machines: total_energy += pm.energy_consumption self._total_energy_consumption += total_energy if (tick + 1) % self._snapshot_resolution == 0: # NOTE: We should use frame_index method to get correct index in snapshot list. self._frame.take_snapshot(self.frame_index(tick)) # Stop current episode if we reach max tick. return tick + 1 >= self._max_tick def get_event_payload_detail(self) -> dict: """dict: Event payload details of current scenario.""" return { Events.REQUEST.name: VmRequestPayload.summary_key, MaroEvents.PENDING_DECISION.name: DecisionPayload.summary_key } def get_agent_idx_list(self) -> List[int]: """Get a list of agent index.""" pass def get_node_mapping(self) -> dict: """dict: Node mapping.""" node_mapping = {} return node_mapping def get_vm_cpu_utilization_series(self, vm_id: int) -> List[float]: """Get the CPU utilization series of the specific VM by the given ID.""" if vm_id in self._live_vms: return self._live_vms[vm_id].get_historical_utilization_series( cur_tick=self._tick) return [] def get_metrics(self) -> DocableDict: """Get current environment metrics information. Returns: DocableDict: Metrics information. """ return DocableDict( metrics_desc, total_vm_requests=self._total_vm_requests, total_energy_consumption=self._total_energy_consumption, successful_allocation=self._successful_allocation, successful_completion=self._successful_completion, failed_allocation=self._failed_allocation, total_latency=self._total_latency, total_oversubscriptions=self._total_oversubscriptions) def _register_events(self): # Register our own events and their callback handlers. self._event_buffer.register_event_handler(event_type=Events.REQUEST, handler=self._on_vm_required) # Generate decision event. self._event_buffer.register_event_handler( event_type=MaroEvents.TAKE_ACTION, handler=self._on_action_received) def _update_vm_workload(self, cur_tick_cpu_utilization: dict): """Update all live VMs CPU utilization. The length of VMs utilization series could be difference among all VMs, because index 0 represents the VM's CPU utilization at the tick it starts. """ for live_vm in self._live_vms.values(): # NOTE: Some data could be lost. We use -1.0 to represent the missing data. if live_vm.id not in cur_tick_cpu_utilization: live_vm.add_utilization(cpu_utilization=-1.0) else: live_vm.add_utilization( cpu_utilization=cur_tick_cpu_utilization[live_vm.id]) live_vm.cpu_utilization = live_vm.get_utilization( cur_tick=self._tick) for pending_vm_payload in self._pending_vm_request_payload.values(): pending_vm = pending_vm_payload.vm_info if pending_vm.id not in cur_tick_cpu_utilization: pending_vm.add_utilization(cpu_utilization=-1.0) else: pending_vm.add_utilization( cpu_utilization=cur_tick_cpu_utilization[pending_vm.id]) def _update_pm_workload(self): """Update CPU utilization occupied by total VMs on each PM.""" for pm in self._machines: total_pm_cpu_cores_used: float = 0.0 for vm_id in pm.live_vms: vm = self._live_vms[vm_id] total_pm_cpu_cores_used += vm.cpu_utilization * vm.cpu_cores_requirement pm.update_cpu_utilization(vm=None, cpu_utilization=total_pm_cpu_cores_used / pm.cpu_cores_capacity) pm.energy_consumption = self._cpu_utilization_to_energy_consumption( cpu_utilization=pm.cpu_utilization) def _cpu_utilization_to_energy_consumption( self, cpu_utilization: float) -> float: """Convert the CPU utilization to energy consumption. The formulation refers to https://dl.acm.org/doi/epdf/10.1145/1273440.1250665 """ power: float = self._config.PM.POWER_CURVE.CALIBRATION_PARAMETER busy_power = self._config.PM.POWER_CURVE.BUSY_POWER idle_power = self._config.PM.POWER_CURVE.IDLE_POWER cpu_utilization /= 100 return idle_power + (busy_power - idle_power) * ( 2 * cpu_utilization - pow(cpu_utilization, power)) def _postpone_vm_request(self, postpone_type: PostponeType, vm_id: int, remaining_buffer_time: int): """Postpone VM request.""" if remaining_buffer_time >= self._delay_duration: if postpone_type == PostponeType.Resource: self._total_latency.due_to_resource += self._delay_duration elif postpone_type == PostponeType.Agent: self._total_latency.due_to_agent += self._delay_duration postpone_payload = self._pending_vm_request_payload[vm_id] postpone_payload.remaining_buffer_time -= self._delay_duration postpone_event = self._event_buffer.gen_cascade_event( tick=self._tick + self._delay_duration, event_type=Events.REQUEST, payload=postpone_payload) self._event_buffer.insert_event(event=postpone_event) else: # Fail # Pop out VM request payload. self._pending_vm_request_payload.pop(vm_id) # Add failed allocation. self._failed_allocation += 1 def _get_valid_pms(self, vm_cpu_cores_requirement: int, vm_memory_requirement: int) -> List[int]: """Check all valid PMs. Args: vm_cpu_cores_requirement (int): The CPU cores requested by the VM. """ # NOTE: Should we implement this logic inside the action scope? # TODO: In oversubscribable scenario, we should consider more situations, like # the PM type (oversubscribable and non-oversubscribable). valid_pm_list = [] for pm in self._machines: if (pm.cpu_cores_capacity - pm.cpu_cores_allocated >= vm_cpu_cores_requirement and pm.memory_capacity - pm.memory_allocated >= vm_memory_requirement): valid_pm_list.append(pm.id) return valid_pm_list def _process_finished_vm(self): """Release PM resource from the finished VM.""" # Get the VM info. vm_id_list = [] for vm in self._live_vms.values(): if vm.deletion_tick == self._tick: # Release PM resources. pm: PhysicalMachine = self._machines[vm.pm_id] pm.cpu_cores_allocated -= vm.cpu_cores_requirement pm.memory_allocated -= vm.memory_requirement pm.deallocate_vms(vm_ids=[vm.id]) vm_id_list.append(vm.id) # VM completed task succeed. self._successful_completion += 1 # Remove dead VM. for vm_id in vm_id_list: self._live_vms.pop(vm_id) def _on_vm_required(self, vm_request_event: CascadeEvent): """Callback when there is a VM request generated.""" # Get VM data from payload. payload: VmRequestPayload = vm_request_event.payload vm_info: VirtualMachine = payload.vm_info remaining_buffer_time: int = payload.remaining_buffer_time # Store the payload inside business engine. self._pending_vm_request_payload[vm_info.id] = payload # Get valid pm list. valid_pm_list = self._get_valid_pms( vm_cpu_cores_requirement=vm_info.cpu_cores_requirement, vm_memory_requirement=vm_info.memory_requirement) if len(valid_pm_list) > 0: # Generate pending decision. decision_payload = DecisionPayload( valid_pms=valid_pm_list, vm_id=vm_info.id, vm_cpu_cores_requirement=vm_info.cpu_cores_requirement, vm_memory_requirement=vm_info.memory_requirement, remaining_buffer_time=remaining_buffer_time) self._pending_action_vm_id = vm_info.id pending_decision_event = self._event_buffer.gen_decision_event( tick=vm_request_event.tick, payload=decision_payload) vm_request_event.add_immediate_event(event=pending_decision_event) else: # Either postpone the requirement event or failed. self._postpone_vm_request( postpone_type=PostponeType.Resource, vm_id=vm_info.id, remaining_buffer_time=remaining_buffer_time) def _on_action_received(self, event: CascadeEvent): """Callback wen we get an action from agent.""" action = None if event is None or event.payload is None: self._pending_vm_request_payload.pop(self._pending_action_vm_id) return cur_tick: int = event.tick for action in event.payload: vm_id: int = action.vm_id if vm_id not in self._pending_vm_request_payload: raise Exception( f"The VM id: '{vm_id}' sent by agent is invalid.") if type(action) == AllocateAction: pm_id = action.pm_id vm: VirtualMachine = self._pending_vm_request_payload[ vm_id].vm_info lifetime = vm.lifetime # Update VM information. vm.pm_id = pm_id vm.creation_tick = cur_tick vm.deletion_tick = cur_tick + lifetime vm.cpu_utilization = vm.get_utilization(cur_tick=cur_tick) # Pop out the VM from pending requests and add to live VM dict. self._pending_vm_request_payload.pop(vm_id) self._live_vms[vm_id] = vm # TODO: Current logic can not fulfill the oversubscription case. # Update PM resources requested by VM. pm = self._machines[pm_id] pm.allocate_vms(vm_ids=[vm.id]) pm.cpu_cores_allocated += vm.cpu_cores_requirement pm.memory_allocated += vm.memory_requirement pm.update_cpu_utilization(vm=vm, cpu_utilization=None) pm.energy_consumption = self._cpu_utilization_to_energy_consumption( cpu_utilization=pm.cpu_utilization) self._successful_allocation += 1 elif type(action) == PostponeAction: postpone_step = action.postpone_step remaining_buffer_time = self._pending_vm_request_payload[ vm_id].remaining_buffer_time # Either postpone the requirement event or failed. self._postpone_vm_request( postpone_type=PostponeType.Agent, vm_id=vm_id, remaining_buffer_time=remaining_buffer_time - postpone_step * self._delay_duration) def _download_processed_data(self): """Build processed data.""" data_root = StaticParameter.data_root build_folder = os.path.join(data_root, self._scenario_name, ".build", self._topology) source = self._config.PROCESSED_DATA_URL download_file_name = source.split('/')[-1] download_file_path = os.path.join(build_folder, download_file_name) # Download file from the Azure blob storage. if not os.path.exists(download_file_path): logger.info_green( f"Downloading data from {source} to {download_file_path}.") download_file(source=source, destination=download_file_path) else: logger.info_green("File already exists, skipping download.") logger.info_green(f"Unzip {download_file_path} to {build_folder}") # Unzip files. tar = tarfile.open(download_file_path, "r:gz") tar.extractall(path=build_folder) tar.close() # Move to the correct path. unzip_file = os.path.join(build_folder, "build") file_names = os.listdir(unzip_file) for file_name in file_names: shutil.move(os.path.join(unzip_file, file_name), build_folder) os.rmdir(unzip_file)
class EventBindBinaryReader: """Binary reader that will generate and insert event that defined in meta into event buffer. If items that not match any event type, then they will bind to a predefined event UNPROECESSED_EVENT, you can handle this by register an event handler. Examples: .. code-block:: python class MyEvents(Enum): Event1 = 'event1' Event2 = 'event2' event_buffer = EventBuffer() # Handle events we defined. event_buffer.register_event_handler(MyEvents.Event1, on_event1_occur) event_buffer.register_event_handler(MyEvents.Event2, on_event1_occur) # Handle item that cannot map to event. event_buffer.register_event_handler(UNPROECESSED_EVENT, on_unprocessed_item) # Create reader within tick (0, 1000), and events will be mapped to MyEvents type. reader = EventBindBinaryReader(MyEvents, event_buffer, path_to_bin, 0, 1000) # Read and gen event at tick 0. reader.read_items(0) def on_event1_occur(evt: Event): pass def on_event1_occur(evt: Event): pass def on_unprocessed_item(evt: Event): pass Args: event_cls (type): Event class that will be mapped to. event_buffer (EventBuffer): Event buffer that used to generate and insert events. binary_file_path (str): Path to binary file to read. start_tick (int): Start tick to filter, default is 0. end_tick (int): End tick to filter, de fault is 100. time_unit (str): Unit of tick, available units are "d", "h", "m", "s". different unit will affect the reading result. buffer_size (int): In memory buffer size. enable_value_adjust (bool): If reader should adjust the value of the fields that marked as adjust-able. """ def __init__(self, event_cls: type, event_buffer: EventBuffer, binary_file_path: str, start_tick: int = 0, end_tick=100, time_unit: str = "s", buffer_size: int = 100, enable_value_adjust: bool = False): self._reader = BinaryReader(file_path=binary_file_path, enable_value_adjust=enable_value_adjust, buffer_size=buffer_size) self._event_buffer = event_buffer self._start_tick = start_tick self._end_tick = end_tick self._time_unit = time_unit self._event_cls = event_cls self._picker = self._reader.items_tick_picker( start_time_offset=self._start_tick, end_time_offset=self._end_tick, time_unit=self._time_unit) self._init_meta() @property def start_datetime(self) -> datetime: """datetime: Start datetime of this binary file.""" return self._reader.start_datetime @property def end_datetime(self) -> datetime: """datetime: End datetime of this binary file.""" return self._reader.end_datetime @property def header(self) -> tuple: """tuple: Header in binary file.""" return self._reader.header def read_items(self, tick: int): """Read items by tick and generate related events, then insert them into EventBuffer. Args: tick(int): Tick to get items, NOTE: the tick must specified sequentially. """ if self._picker: for item in self._picker.items(tick): self._gen_event_by_item(item, tick) return None def reset(self): """Reset states of reader.""" self._reader.reset() self._picker = self._reader.items_tick_picker( start_time_offset=self._start_tick, end_time_offset=self._end_tick, time_unit=self._time_unit) def _gen_event_by_item(self, item, tick): event_name = None if self._event_field_name is None and self._default_event is not None: # used default event name to gen event event_name = self._event_cls(self._default_event) elif self._event_field_name is not None: val = getattr(item, self._event_field_name, None) event_name = self._event_cls( self._events.get(val, self._default_event)) else: event_name = UNPROECESSED_EVENT evt = self._event_buffer.gen_atom_event(tick, event_name, payload=item) self._event_buffer.insert_event(evt) def _init_meta(self): meta = self._reader.meta # default event display name self._default_event = None # value -> display name self._events = {} for event in meta.events: self._events[event.value] = event.display_name if meta.default_event_name == event.type_name: # match, get save the display name self._default_event = event.display_name self._event_field_name = meta.event_attr_name
class EventBindBinaryReader: """Binary reader that will generate and insert event that defined in meta into event buffer, If items that not match any event, then they will bind to a predefined event UNPROECESSED_EVENT, you can handle this by register an event handler Examples: .. code-block:: python class MyEvents(Enum): Event1 = 'event1' Event2 = 'event2' event_buffer = EventBuffer() # handle events we defined event_buffer.register_event_handler(MyEvents.Event1, on_event1_occur) event_buffer.register_event_handler(MyEvents.Event2, on_event1_occur) # handle item that cannot map to event event_buffer.register_event_handler(UNPROECESSED_EVENT, on_unprocessed_item) # create reader within tick (0, 1000), and events will be mapped to MyEvents type reader = EventBindBinaryReader(MyEvents, event_buffer, path_to_bin, 0, 1000) # read and gen event at tick 0 reader.read_items(0) def on_event1_occur(evt: Event): pass def on_event1_occur(evt: Event): pass def on_unprocessed_item(evt: Event): pass """ def __init__(self, event_cls: type, event_buffer: EventBuffer, binary_file_path: str, start_tick: int = 0, end_tick=100, time_unit: str = "s", buffer_size: int = 100, enable_value_adjust: bool = False): self._reader = BinaryReader(file_path=binary_file_path, enable_value_adjust=enable_value_adjust, buffer_size=buffer_size) self._event_buffer = event_buffer self._start_tick = start_tick self._end_tick = end_tick self._time_unit = time_unit self._event_cls = event_cls self._picker = self._reader.items_tick_picker( start_time_offset=self._start_tick, end_time_offset=self._end_tick, time_unit=self._time_unit) self._init_meta() @property def start_datetime(self) -> datetime: """Start datetime of this binary file""" return self._reader.start_datetime @property def end_datetime(self) -> datetime: """End datetime of this binary file""" return self._reader.end_datetime @property def header(self) -> tuple: """Header in binary file""" return self._reader.header def read_items(self, tick: int): """Read items by tick and generate related events Args: tick(int): tick to get items, NOTE: the tick must specified sequentially """ if self._picker: for item in self._picker.items(tick): self._gen_event_by_item(item, tick) return None def reset(self): """Reset the reader""" self._reader.reset() self._picker = self._reader.items_tick_picker( start_time_offset=self._start_tick, end_time_offset=self._end_tick, time_unit=self._time_unit) def _gen_event_by_item(self, item, tick): event_name = None if self._event_field_name is None and self._default_event is not None: # used default event name to gen event event_name = self._event_cls(self._default_event) elif self._event_field_name is not None: val = getattr(item, self._event_field_name, None) event_name = self._event_cls( self._events.get(val, self._default_event)) else: event_name = UNPROECESSED_EVENT evt = self._event_buffer.gen_atom_event(tick, event_name, payload=item) self._event_buffer.insert_event(evt) def _init_meta(self): meta = self._reader.meta self._default_event = None # default event display name self._events = {} # value -> display name for event in meta.events: self._events[event.value] = event.display_name if meta.default_event_name == event.type_name: # match, get save the display name self._default_event = event.display_name self._event_field_name = meta.event_attr_name