def get_tf_gpu_deployment(fet_oracle: FetOracle, resource_oracle: ResourceOracle, ranking: DeploymentRanking): tf_gpu_characterization = FunctionCharacterization(images.tf_gpu_manifest, fet_oracle, resource_oracle) tf_gpu_function = FunctionDefinition( name=images.tf_gpu_function, image=images.tf_gpu_manifest, characterization=tf_gpu_characterization, labels={'watchdog': 'http', 'workers': '4', 'cluster': '3', 'device.edgerun.io/accelerator': 'GPU', 'device.edgerun.io/vram': '2000', }) tf_gpu_function.requests = Resources.from_str("300Mi", "1000m") deployment = FunctionDeployment( images.tf_gpu_function, { images.tf_gpu_manifest: tf_gpu_function, }, ranking ) deployment.function_factor = { images.tf_gpu_manifest: 1 } return deployment
def get_fio_deployment(fet_oracle: FetOracle, resource_oracle: ResourceOracle, ranking: DeploymentRanking) -> FunctionDeployment: fio_characterization = FunctionCharacterization(images.fio_manifest, fet_oracle, resource_oracle) fio_function = FunctionDefinition( name=images.fio_function, image=images.fio_manifest, characterization=fio_characterization, labels={'watchdog': 'http', 'workers': '4', 'cluster': '1'}) fio_function.requests = Resources.from_str("200Mi", "1000m") deployment = FunctionDeployment( images.fio_function, { images.fio_manifest: fio_function, }, ranking ) deployment.function_factor = { images.fio_manifest: 1 } return deployment
def get_speech_inference_deployment(fet_oracle: FetOracle, resource_oracle: ResourceOracle, ranking: DeploymentRanking) -> FunctionDeployment: speech_inference_gpu_characterization = FunctionCharacterization(images.speech_inference_gpu_manifest, fet_oracle, resource_oracle) speech_inference_tflite_characterization = FunctionCharacterization(images.speech_inference_tflite_manifest, fet_oracle, resource_oracle) tflite = storage.speech_model_tflite_bucket_item data_storage_tflite = { 'data.skippy.io/receives-from-storage': '48M', 'data.skippy.io/receives-from-storage/path': f'{storage.speech_bucket}/{tflite.name}', } gpu = storage.speech_model_gpu_bucket_item data_storage_gpu = { # this size is without scorer object, which is used to impove accuracy but doesn't seem to affect runtime, # scorer weighs around 900M - simple benchmarks in bash have made no difference in runtime 'data.skippy.io/receives-from-storage': '188M', 'data.skippy.io/receives-from-storage/path': f'{storage.speech_bucket}/{gpu.name}', } speech_gpu_function = FunctionDefinition( name=images.speech_inference_function, image=images.speech_inference_gpu_manifest, characterization=speech_inference_gpu_characterization, labels={'watchdog': 'http', 'workers': '4', 'cluster': '0', 'device.edgerun.io/accelerator': 'GPU', 'device.edgerun.io/vram': '1500', }) speech_gpu_function.labels.update(data_storage_gpu) speech_gpu_function.requests = Resources.from_str("300Mi", "1000m") speech_tflite_function = FunctionDefinition( name=images.speech_inference_function, image=images.speech_inference_tflite_manifest, characterization=speech_inference_tflite_characterization, labels={'watchdog': 'http', 'workers': '4', 'cluster': '1'}) speech_tflite_function.labels.update(data_storage_tflite) speech_tflite_function.requests = Resources.from_str("100Mi", "1000m") deployment = FunctionDeployment( images.speech_inference_function, { images.speech_inference_gpu_manifest: speech_gpu_function, images.speech_inference_tflite_manifest: speech_tflite_function, }, ranking ) deployment.function_factor = { images.speech_inference_tflite_manifest: 1, images.speech_inference_gpu_manifest: 1 } return deployment
def get_mobilenet_inference_deployment(fet_oracle: FetOracle, resource_oracle: ResourceOracle, ranking: DeploymentRanking) -> FunctionDeployment: mobilenet_inference_tflite_characterization = FunctionCharacterization(images.mobilenet_inference_tflite_manifest, fet_oracle, resource_oracle) mobilenet_inference_tpu_characterization = FunctionCharacterization(images.mobilenet_inference_tpu_manifest, fet_oracle, resource_oracle) tflite = storage.mobilenet_model_tflite_bucket_item.name data_storage_tflite_labels = { 'data.skippy.io/receives-from-storage': '4M', 'data.skippy.io/receives-from-storage/path': f'{storage.mobilenet_bucket}/{tflite}', } tpu = storage.mobilenet_model_tpu_bucket_item.name data_storage_tpu_labels = { 'data.skippy.io/receives-from-storage': '4M', 'data.skippy.io/receives-from-storage/path': f'{storage.mobilenet_bucket}/{tpu}', } mobilenet_tpu_function = FunctionDefinition( name=images.mobilenet_inference_function, image=images.mobilenet_inference_tpu_manifest, characterization=mobilenet_inference_tpu_characterization, labels={'watchdog': 'http', 'workers': '4', 'cluster': '1b', 'device.edgerun.io/accelerator': 'TPU'}) mobilenet_tpu_function.requests = Resources.from_str("100Mi", "1000m") mobilenet_tflite_function = FunctionDefinition( name=images.mobilenet_inference_function, image=images.mobilenet_inference_tflite_manifest, characterization=mobilenet_inference_tflite_characterization, labels={'watchdog': 'http', 'workers': '4', 'cluster': '1a'}) mobilenet_tflite_function.requests = Resources.from_str("100Mi", "1000m") mobilenet_tpu_function.labels.update(data_storage_tpu_labels) mobilenet_tflite_function.labels.update(data_storage_tflite_labels) deployment = FunctionDeployment( images.mobilenet_inference_function, { images.mobilenet_inference_tpu_manifest: mobilenet_tpu_function, images.mobilenet_inference_tflite_manifest: mobilenet_tflite_function, }, ranking ) deployment.function_factor = { images.mobilenet_inference_tpu_manifest: 1, images.mobilenet_inference_tflite_manifest: 1 } return deployment
def get_resnet_training_deployment(fet_oracle: FetOracle, resource_oracle: ResourceOracle, ranking: DeploymentRanking) -> FunctionDeployment: resnet_training_gpu_characterization = FunctionCharacterization(images.resnet50_training_gpu_manifest, fet_oracle, resource_oracle) resnet_training_cpu_characterization = FunctionCharacterization(images.resnet50_training_cpu_manifest, fet_oracle, resource_oracle) data = storage.resnet_train_bucket_item.name data_storage_labels = { 'data.skippy.io/receives-from-storage': '58M', 'data.skippy.io/sends-to-storage': '103M', 'data.skippy.io/receives-from-storage/path': f'{storage.resnet_train_bucket}/{data}', 'data.skippy.io/sends-to-storage/path': f'{storage.resnet_train_bucket}/updated_model' } resnet_training_gpu_function = FunctionDefinition( name=images.resnet50_training_function, image=images.resnet50_training_gpu_manifest, characterization=resnet_training_gpu_characterization, labels={'watchdog': 'http', 'workers': '4', 'cluster': '2', 'device.edgerun.io/accelerator': 'GPU', 'device.edgerun.io/vram': '2000', }) resnet_training_gpu_function.labels.update(data_storage_labels) resnet_training_gpu_function.requests = Resources.from_str("800Mi", "1000m") resnet_training_cpu_function = FunctionDefinition( name=images.resnet50_training_function, image=images.resnet50_training_cpu_manifest, characterization=resnet_training_cpu_characterization, labels={'watchdog': 'http', 'workers': '4', 'cluster': '2'}) resnet_training_cpu_function.labels.update(data_storage_labels) resnet_training_cpu_function.requests = Resources.from_str("1Gi", "1000m") deployment = FunctionDeployment( images.resnet50_training_function, { images.resnet50_training_gpu_manifest: resnet_training_gpu_function, images.resnet50_training_cpu_manifest: resnet_training_cpu_function }, ranking ) deployment.function_factor = { images.resnet50_training_gpu_manifest: 1, images.resnet50_training_cpu_manifest: 1 } return deployment
def get_tf_gpu_deployment(ranking: DeploymentRanking, scaling_config: ScalingConfiguration = None): # Design Time tf_gpu_function_image = FunctionImage(image=images.tf_gpu_manifest) tf_gpu_function = Function(name=images.tf_gpu_function, fn_images=[tf_gpu_function_image]) # Run time tf_gpu_function_requests = KubernetesResourceConfiguration.create_from_str( cpu="1000m", memory="300Mi") tf_gpu_function_container = FunctionContainer( tf_gpu_function_image, resource_config=tf_gpu_function_requests, labels={ 'watchdog': 'http', 'workers': '4', 'cluster': '3', 'device.edgerun.io/accelerator': 'GPU', 'device.edgerun.io/vram': '2000', }) deployment = FunctionDeployment( tf_gpu_function, [tf_gpu_function_container], ScalingConfiguration() if scaling_config is None else scaling_config, ranking) return deployment
def prepare_deployments(self) -> List[FunctionDeployment]: python_pi_deployment = FunctionDeployment( name='python-pi', function_definitions={ 'python-pi-cpu': FunctionDefinition(name='python-pi', image='python-pi-cpu') } ) resnet50_inference_deployment = FunctionDeployment( name='resnet50-inference', function_definitions={ 'resnet50-inference-gpu': FunctionDefinition(name='resnet50-inference', image='resnet50-inference-gpu'), 'resnet50-inference-cpu': FunctionDefinition(name='resnet50-inference', image='resnet50-inference-cpu') } ) return [python_pi_deployment, resnet50_inference_deployment]
def get_fio_deployment( ranking: DeploymentRanking, scaling_config: ScalingConfiguration = None) -> FunctionDeployment: # Design Time fio_function_image = FunctionImage(image=images.fio_manifest) fio_function = Function(name=images.fio_function, fn_images=[fio_function_image]) # Run time fio_function_requests = KubernetesResourceConfiguration.create_from_str( cpu="1000m", memory="200Mi") fio_function_container = FunctionContainer( fio_function_image, resource_config=fio_function_requests, labels={ 'watchdog': 'http', 'workers': '4', 'cluster': '1' }) deployment = FunctionDeployment( fio_function, [fio_function_container], ScalingConfiguration() if scaling_config is None else scaling_config, ranking) return deployment
def prepare_resnet_inference_deployment(self): # Design time resnet_inference = 'resnet50-inference' inference_cpu = 'resnet50-inference-cpu' inference_gpu = 'resnet50-inference-gpu' resnet_inference_gpu = FunctionImage(image=inference_gpu) resnet_inference_cpu = FunctionImage(image=inference_cpu) resnet_fn = Function( resnet_inference, fn_images=[resnet_inference_gpu, resnet_inference_cpu]) # Run time # default kubernetes requested resources resnet_cpu_container = FunctionContainer(resnet_inference_cpu) # custom defined requested resources request = KubernetesResourceConfiguration.create_from_str( cpu='100m', memory='1024Mi') resnet_gpu_container = FunctionContainer(resnet_inference_gpu, resource_config=request) resnet_fd = FunctionDeployment( resnet_fn, [resnet_cpu_container, resnet_gpu_container], ScalingConfiguration(), DeploymentRanking([inference_gpu, inference_cpu])) return resnet_fd
def get_resnet50_inference_deployment(fet_oracle: FetOracle, resource_oracle: ResourceOracle, ranking: DeploymentRanking) -> FunctionDeployment: resnet_cpu_characterization = FunctionCharacterization(images.resnet50_inference_cpu_manifest, fet_oracle, resource_oracle) resnet_gpu_characterization = FunctionCharacterization(images.resnet50_inference_gpu_manifest, fet_oracle, resource_oracle) model = storage.resnet_model_bucket_item.name data_storage = { 'data.skippy.io/receives-from-storage': '103M', 'data.skippy.io/receives-from-storage/path': f'{storage.resnet_model_bucket}/{model}', } resnet50_cpu_function = FunctionDefinition( name=images.resnet50_inference_function, image=images.resnet50_inference_cpu_manifest, characterization=resnet_cpu_characterization, labels={'watchdog': 'http', 'workers': '4', 'cluster': '4a'}) resnet50_cpu_function.requests = Resources.from_str("150Mi", "1000m") resnet50_gpu_function = FunctionDefinition( name=images.resnet50_inference_function, image=images.resnet50_inference_gpu_manifest, characterization=resnet_gpu_characterization, labels={'watchdog': 'http', 'workers': '4', 'device.edgerun.io/accelerator': 'GPU', 'device.edgerun.io/vram': '1500', 'cluster': '4b'}) resnet50_gpu_function.requests = Resources.from_str("400Mi", "1000m") resnet50_gpu_function.labels.update(data_storage) resnet50_cpu_function.labels.update(data_storage) deployment = FunctionDeployment( images.resnet50_inference_function, { images.resnet50_inference_gpu_manifest: resnet50_gpu_function, images.resnet50_inference_cpu_manifest: resnet50_cpu_function, }, ranking ) deployment.function_factor = { images.resnet50_inference_gpu_manifest: 1, images.resnet50_inference_cpu_manifest: 1, } return deployment
def get_resnet50_inference_deployment( ranking: DeploymentRanking, scaling_config: ScalingConfiguration = None) -> FunctionDeployment: # Design Time resnet50_cpu_function_image = FunctionImage( image=images.resnet50_inference_cpu_manifest) resnet50_gpu_function_image = FunctionImage( image=images.resnet50_inference_gpu_manifest) resnet50_function = Function( images.resnet50_inference_function, fn_images=[resnet50_gpu_function_image, resnet50_cpu_function_image]) # Run time model = storage.resnet_model_bucket_item.name data_storage = { 'data.skippy.io/receives-from-storage': '103M', 'data.skippy.io/receives-from-storage/path': f'{storage.resnet_model_bucket}/{model}', } resnet50_cpu_function_requests = KubernetesResourceConfiguration.create_from_str( cpu="1000m", memory="150Mi") resnet50_cpu_function = FunctionContainer( resnet50_cpu_function_image, resource_config=resnet50_cpu_function_requests, labels={ 'watchdog': 'http', 'workers': '4', 'cluster': '4a' }) resnet50_gpu_function_requests = KubernetesResourceConfiguration.create_from_str( cpu="1000m", memory="400Mi") resnet50_gpu_function = FunctionContainer( resnet50_gpu_function_image, resource_config=resnet50_gpu_function_requests, labels={ 'watchdog': 'http', 'workers': '4', 'device.edgerun.io/accelerator': 'GPU', 'device.edgerun.io/vram': '1500', 'cluster': '4b' }) resnet50_gpu_function.labels.update(data_storage) resnet50_cpu_function.labels.update(data_storage) deployment = FunctionDeployment( resnet50_function, [resnet50_gpu_function, resnet50_cpu_function], ScalingConfiguration() if scaling_config is None else scaling_config, ranking) return deployment
def get_resnet_preprocessing_deployment(fet_oracle: FetOracle, resource_oracle: ResourceOracle, ranking: DeploymentRanking): resnet_preprocessing_characterization = FunctionCharacterization(images.resnet50_preprocessing_manifest, fet_oracle, resource_oracle) data = storage.resnet_pre_bucket_item.name data_storage_labels = { 'data.skippy.io/receives-from-storage': '14M', 'data.skippy.io/sends-to-storage': '14M', 'data.skippy.io/receives-from-storage/path': f'{storage.resnet_pre_bucket}/{data}', 'data.skippy.io/sends-to-storage/path': f'{storage.resnet_pre_bucket}/preprocessed' } resnet_preprocessing_function = FunctionDefinition( name=images.resnet50_preprocessing_function, image=images.resnet50_preprocessing_manifest, characterization=resnet_preprocessing_characterization, labels={'watchdog': 'http', 'workers': '4', 'cluster': '1'} ) resnet_preprocessing_function.labels.update(data_storage_labels) resnet_preprocessing_function.requests = Resources.from_str("100Mi", "1000m") deployment = FunctionDeployment( images.resnet50_preprocessing_function, { images.resnet50_preprocessing_manifest: resnet_preprocessing_function, }, ranking ) deployment.function_factor = { images.resnet50_preprocessing_manifest: 1 } return deployment
def prepare_python_pi_deployment(self): # Design Time python_pi = 'python-pi' python_pi_cpu = FunctionImage(image='python-pi-cpu') python_pi_fn = Function(python_pi, fn_images=[python_pi_cpu]) # Run time python_pi_fn_container = FunctionContainer(python_pi_cpu) python_pi_fd = FunctionDeployment(python_pi_fn, [python_pi_fn_container], ScalingConfiguration()) return python_pi_fd
def get_resnet_preprocessing_deployment( ranking: DeploymentRanking, scaling_config: ScalingConfiguration = None): # Design time resnet_preprocessing_function_image = FunctionImage( image=images.resnet50_preprocessing_manifest) resnet_preprocessing_function = Function( name=images.resnet50_preprocessing_function, fn_images=[resnet_preprocessing_function_image]) # Run time data = storage.resnet_pre_bucket_item.name data_storage_labels = { 'data.skippy.io/receives-from-storage': '14M', 'data.skippy.io/sends-to-storage': '14M', 'data.skippy.io/receives-from-storage/path': f'{storage.resnet_pre_bucket}/{data}', 'data.skippy.io/sends-to-storage/path': f'{storage.resnet_pre_bucket}/preprocessed' } resnet_preprocessing_function_requests = KubernetesResourceConfiguration.create_from_str( cpu="1000m", memory="100Mi") resnet_preprocessing_function_container = FunctionContainer( fn_image=resnet_preprocessing_function_image, resource_config=resnet_preprocessing_function_requests, labels={ 'watchdog': 'http', 'workers': '4', 'cluster': '1' }) resnet_preprocessing_function.labels.update(data_storage_labels) deployment = FunctionDeployment( resnet_preprocessing_function, [resnet_preprocessing_function_container], ScalingConfiguration() if scaling_config is None else scaling_config, ranking) return deployment
def prepare_resnet_training_deployment(self): # Design time resnet_training = 'resnet50-training' training_cpu = 'resnet50-training-cpu' resnet_training_cpu = FunctionImage(image=training_cpu) resnet_fn = Function(resnet_training, fn_images=[resnet_training_cpu]) # Run time resnet_cpu_container = FunctionContainer(resnet_training_cpu) resnet_fd = FunctionDeployment(resnet_fn, [resnet_cpu_container], ScalingConfiguration(), DeploymentRanking([training_cpu])) return resnet_fd
def prepare_resnet_inference_deployment(self): # Design time resnet_inference = 'resnet50-inference' inference_cpu = 'resnet50-inference-cpu' resnet_inference_cpu = FunctionImage(image=inference_cpu) resnet_fn = Function(resnet_inference, fn_images=[resnet_inference_cpu]) # Run time resnet_cpu_container = FunctionContainer(resnet_inference_cpu) resnet_fd = FunctionDeployment(resnet_fn, [resnet_cpu_container], ScalingConfiguration(), DeploymentRanking([inference_cpu])) return resnet_fd
def get_speech_inference_deployment( ranking: DeploymentRanking, scaling_config: ScalingConfiguration = None) -> FunctionDeployment: # Design time speech_gpu_function_image = FunctionImage( image=images.speech_inference_gpu_manifest) speech_tflite_function_image = FunctionImage( image=images.speech_inference_tflite_manifest) speech_function = Function( images.speech_inference_function, fn_images=[speech_gpu_function_image, speech_tflite_function_image]) # Run time tflite = storage.speech_model_tflite_bucket_item data_storage_tflite = { 'data.skippy.io/receives-from-storage': '48M', 'data.skippy.io/receives-from-storage/path': f'{storage.speech_bucket}/{tflite.name}', } gpu = storage.speech_model_gpu_bucket_item data_storage_gpu = { # this size is without scorer object, which is used to impove accuracy but doesn't seem to affect runtime, # scorer weighs around 900M - simple benchmarks in bash have made no difference in runtime 'data.skippy.io/receives-from-storage': '188M', 'data.skippy.io/receives-from-storage/path': f'{storage.speech_bucket}/{gpu.name}', } speech_gpu_function_requests = KubernetesResourceConfiguration.create_from_str( cpu="1000m", memory="300Mi") speech_gpu_function = FunctionContainer( speech_gpu_function_image, resource_config=speech_gpu_function_requests, labels={ 'watchdog': 'http', 'workers': '4', 'cluster': '0', 'device.edgerun.io/accelerator': 'GPU', 'device.edgerun.io/vram': '1500', }) speech_gpu_function.labels.update(data_storage_gpu) speech_tflite_function_requests = KubernetesResourceConfiguration.create_from_str( cpu="1000m", memory="100Mi") speech_tflite_function = FunctionContainer( speech_tflite_function_image, resource_config=speech_tflite_function_requests, labels={ 'watchdog': 'http', 'workers': '4', 'cluster': '1' }) speech_tflite_function.labels.update(data_storage_tflite) deployment = FunctionDeployment( speech_function, [speech_gpu_function, speech_tflite_function], ScalingConfiguration() if scaling_config is None else scaling_config, ranking) return deployment
def get_mobilenet_inference_deployment( ranking: DeploymentRanking, scaling_config: ScalingConfiguration = None) -> FunctionDeployment: # Design time mobilenet_tpu_function_image = FunctionImage( image=images.mobilenet_inference_tpu_manifest) mobilenet_tflite_function_image = FunctionImage( image=images.mobilenet_inference_tflite_manifest) mobilenet_function = Function(images.mobilenet_inference_function, fn_images=[ mobilenet_tpu_function_image, mobilenet_tflite_function_image ]) # Run time tflite = storage.mobilenet_model_tflite_bucket_item.name data_storage_tflite_labels = { 'data.skippy.io/receives-from-storage': '4M', 'data.skippy.io/receives-from-storage/path': f'{storage.mobilenet_bucket}/{tflite}', } tpu = storage.mobilenet_model_tpu_bucket_item.name data_storage_tpu_labels = { 'data.skippy.io/receives-from-storage': '4M', 'data.skippy.io/receives-from-storage/path': f'{storage.mobilenet_bucket}/{tpu}', } mobilenet_tpu_function_requests = KubernetesResourceConfiguration.create_from_str( cpu="1000m", memory="100Mi") mobilenet_tpu_function = FunctionContainer( mobilenet_tpu_function_image, resource_config=mobilenet_tpu_function_requests, labels={ 'watchdog': 'http', 'workers': '4', 'cluster': '1b', 'device.edgerun.io/accelerator': 'TPU' }) mobilenet_tflite_function_requests = KubernetesResourceConfiguration.create_from_str( cpu="1000m", memory="100Mi") mobilenet_tflite_function = FunctionContainer( mobilenet_tflite_function_image, resource_config=mobilenet_tflite_function_requests, labels={ 'watchdog': 'http', 'workers': '4', 'cluster': '1a' }) mobilenet_tpu_function.labels.update(data_storage_tpu_labels) mobilenet_tflite_function.labels.update(data_storage_tflite_labels) deployment = FunctionDeployment( mobilenet_function, [mobilenet_tpu_function, mobilenet_tflite_function], ScalingConfiguration() if scaling_config is None else scaling_config, ranking) deployment.function_factor = { images.mobilenet_inference_tpu_manifest: 1, images.mobilenet_inference_tflite_manifest: 1 } return deployment