def test_bounds_checking(self): num_replicas = 10 max_replicas = 11 min_replicas = 9 config = AutoscalingConfig( max_replicas=max_replicas, min_replicas=min_replicas, target_num_ongoing_requests_per_replica=100, ) desired_num_replicas = calculate_desired_num_replicas( autoscaling_config=config, current_num_ongoing_requests=[150] * num_replicas) assert desired_num_replicas == max_replicas desired_num_replicas = calculate_desired_num_replicas( autoscaling_config=config, current_num_ongoing_requests=[50] * num_replicas) assert desired_num_replicas == min_replicas for i in range(50, 150): desired_num_replicas = calculate_desired_num_replicas( autoscaling_config=config, current_num_ongoing_requests=[i] * num_replicas, ) assert min_replicas <= desired_num_replicas <= max_replicas
def test_smoothing_factor(self): config = AutoscalingConfig(min_replicas=0, max_replicas=100, target_num_ongoing_requests_per_replica=1, smoothing_factor=0.5) num_replicas = 10 num_ongoing_requests = [4.0] * num_replicas desired_num_replicas = calculate_desired_num_replicas( autoscaling_config=config, current_num_ongoing_requests=num_ongoing_requests) assert 24 <= desired_num_replicas <= 26 # 10 + 0.5 * (40 - 10) = 25 num_ongoing_requests = [0.25] * num_replicas desired_num_replicas = calculate_desired_num_replicas( autoscaling_config=config, current_num_ongoing_requests=num_ongoing_requests) assert 5 <= desired_num_replicas <= 8 # 10 + 0.5 * (2.5 - 10) = 6.25
def test_scale_down(self): config = AutoscalingConfig(min_replicas=0, max_replicas=100, target_num_ongoing_requests_per_replica=1) num_replicas = 10 num_ongoing_requests = [0.5] * num_replicas desired_num_replicas = calculate_desired_num_replicas( autoscaling_config=config, current_num_ongoing_requests=num_ongoing_requests) assert 4 <= desired_num_replicas <= 6 # 10 * 0.5 = 5
def test_scale_up(self): config = AutoscalingConfig(min_replicas=0, max_replicas=100, target_num_ongoing_requests_per_replica=1) num_replicas = 10 num_ongoing_requests = [2.0] * num_replicas desired_num_replicas = calculate_desired_num_replicas( autoscaling_config=config, current_num_ongoing_requests=num_ongoing_requests) assert 19 <= desired_num_replicas <= 21 # 10 * 2 = 20
def autoscale(self) -> None: """Update autoscaling deployments with calculated num_replicas.""" for deployment_name, (backend_info, route_prefix) in self.list_deployments().items(): backend_config = backend_info.backend_config autoscaling_config = backend_config.autoscaling_config if autoscaling_config is None: continue replicas = self.backend_state_manager._backend_states[ deployment_name]._replicas running_replicas = replicas.get([ReplicaState.RUNNING]) current_num_ongoing_requests = [] for replica in running_replicas: replica_tag = replica.replica_tag num_ongoing_requests = ( self.autoscaling_metrics_store.window_average( replica_tag, time.time() - autoscaling_config.look_back_period_s)) if num_ongoing_requests is not None: current_num_ongoing_requests.append(num_ongoing_requests) if len(current_num_ongoing_requests) == 0: continue new_backend_config = backend_config.copy() new_backend_config.num_replicas = calculate_desired_num_replicas( autoscaling_config, current_num_ongoing_requests) replica_config = backend_info.replica_config deployer_job_id = backend_info.deployer_job_id backend_config_proto_bytes = new_backend_config.to_proto_bytes() goal_id, updating = self.deploy(deployment_name, backend_config_proto_bytes, replica_config, version=backend_info.version, prev_version=backend_info.version, route_prefix=route_prefix, deployer_job_id=deployer_job_id)