def get_total_resources(cls, master_resources, environment, cluster, is_distributed): worker_resources = cls.get_worker_resources( environment=environment, cluster=cluster, is_distributed=is_distributed, ) ps_resources = cls.get_ps_resources( environment=environment, cluster=cluster, is_distributed=is_distributed, ) if not any([master_resources, worker_resources, ps_resources]): return None total_resources = PodResourcesConfig() if master_resources: total_resources += master_resources for w_resources in six.itervalues(worker_resources or {}): total_resources += w_resources for p_resources in six.itervalues(ps_resources or {}): total_resources += p_resources return total_resources.to_dict()
def test_pytorch_config(self): config_dict = { 'n_workers': 10, } config = PytorchConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add default worker resources config_dict['default_worker'] = { 'resources': PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1), tpu=K8SResourcesConfig(1, 1), gpu=K8SResourcesConfig(2, 4)).to_dict() } config = PytorchConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Adding custom resources for worker 4 config_dict['worker'] = [{ 'index': 4, 'resources': PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1), memory=K8SResourcesConfig(256, 400)).to_dict() }] config = PytorchConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict())
def test_tensorflow_config(self): config_dict = { 'n_workers': 10, 'n_ps': 5, } config = TensorflowConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add run config config_dict['run_config'] = TFRunConfig().to_dict() with self.assertRaises(ValidationError): TensorflowConfig.from_dict(config_dict) del config_dict['run_config'] # Add default worker resources config_dict['default_worker'] = { 'resources': PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1), gpu=K8SResourcesConfig(2, 4), tpu=K8SResourcesConfig(2, 8)).to_dict(), } config = TensorflowConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_light_dict()) # Add default ps resources config_dict['default_ps'] = { 'resources': PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1), memory=K8SResourcesConfig(256, 400)).to_dict() } config = TensorflowConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_light_dict()) # Adding custom resources for worker 4 config_dict['worker'] = [{ 'index': 4, 'resources': PodResourcesConfig( cpu=K8SResourcesConfig(0.5, 1), memory=K8SResourcesConfig(256, 400), ).to_dict() }] config = TensorflowConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_light_dict()) # Adding custom resources for ps 4 config_dict['ps'] = [{ 'index': 4, 'resources': PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1), memory=K8SResourcesConfig(256, 400)).to_dict() }] config = TensorflowConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_light_dict())
def test_pod_resources_add(self): config_dict1 = { 'cpu': { 'requests': 0.8, }, 'gpu': { 'requests': 2, }, 'tpu': { 'requests': 2, 'limits': 4 }, 'memory': { 'requests': 200, 'limits': 300 }, } config_dict2 = { 'gpu': { 'limits': 4 }, 'tpu': { 'requests': 2, }, 'memory': { 'requests': 300, 'limits': 200 }, } config1 = PodResourcesConfig.from_dict(config_dict1) config2 = PodResourcesConfig.from_dict(config_dict2) config = config1 + config2 assert config.cpu.to_dict() == {'requests': 0.8} assert config.memory.to_dict() == {'requests': 500, 'limits': 500} assert config.gpu.to_dict() == {'requests': 2, 'limits': 4} assert config.tpu.to_dict() == {'requests': 4, 'limits': 4}
def test_environment_config(self): config_dict = { 'resources': PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1)).to_dict() } config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add node selectors config_dict['node_selector'] = { 'polyaxon.com': 'master', } config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add persistence config_dict['persistence'] = { 'data': ['data1', 'data2'], 'outputs': 'outputs1', } config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add outputs config_dict['outputs'] = { 'jobs': ['data1.dfs', 34, 'data2'], 'experiments': [1, 'outputs1', 2, 3], } config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add secrets config_dict['secret_refs'] = ['secret1', 'secret2'] config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add configmaps config_dict['configmap_refs'] = ['configmap1', 'configmap2'] config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict())
def test_pod_resources_config(self): config_dict = { 'cpu': { 'requests': 0.8, 'limits': 1 }, 'gpu': { 'requests': 2, 'limits': 4 }, 'tpu': { 'requests': 2, 'limits': 4 }, 'memory': { 'requests': 265, 'limits': 512 }, } config = PodResourcesConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict())
def test_notebook_environment_config(self): config_dict = { 'environment': { 'resources': PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1)).to_dict(), } } config = NotebookConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) config_dict['backend'] = 'notebook' config = NotebookConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) config_dict['backend'] = 'lab' config = NotebookConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) config_dict['backend'] = 'foo' with self.assertRaises(ValidationError): NotebookConfig.from_dict(config_dict)
def test_experiment_environment_config(self): config_dict = { 'resources': PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1)).to_dict(), 'replicas': { 'n_workers': 10, 'n_ps': 5, } } config = ExperimentEnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add some field should raise config_dict['foo'] = { 'n_workers': 10, 'n_ps': 5, } with self.assertRaises(ValidationError): ExperimentEnvironmentConfig.from_dict(config_dict) del config_dict['foo'] experiment_config = { 'environment': config_dict, 'framework': 'tensorflow' } config = ExperimentConfig.from_dict(experiment_config) assert_equal_dict(experiment_config, config.to_dict()) # Removing framework tensorflow should raise del experiment_config['framework'] with self.assertRaises(ValidationError): ExperimentConfig.from_dict(experiment_config) # Using unknown framework should raise experiment_config['framework'] = 'foo' with self.assertRaises(ValidationError): ExperimentConfig.from_dict(experiment_config) # Using known framework experiment_config['framework'] = 'mxnet' config = ExperimentConfig.from_dict(experiment_config) assert_equal_dict(experiment_config, config.to_dict()) # Adding horovod should raise experiment_config['framework'] = 'horovod' with self.assertRaises(ValidationError): ExperimentConfig.from_dict(experiment_config) # Setting correct horovod replicas should pass experiment_config['environment']['replicas'] = { 'n_workers': 5 } config = ExperimentConfig.from_dict(experiment_config) assert_equal_dict(experiment_config, config.to_dict()) # Adding pytorch should pass experiment_config['framework'] = 'pytorch' config = ExperimentConfig.from_dict(experiment_config) assert_equal_dict(experiment_config, config.to_dict()) # Setting wrong pytorch replicas should raise experiment_config['environment']['replicas'] = { 'n_workers': 5, 'n_ps': 1 } with self.assertRaises(ValidationError): ExperimentConfig.from_dict(experiment_config)
def test_environment_config(self): config_dict = { 'resources': PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1)).to_dict() } config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add node selectors config_dict['node_selector'] = { 'polyaxon.com': 'master', } config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add labels config_dict['labels'] = { 'foo': 'bar', } config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add annotations config_dict['annotations'] = { 'foo': 'bar', } config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add persistence config_dict['data_refs'] = ['data1', 'data2'] config_dict['artifact_refs'] = ['outputs1'] config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) config_dict['outputs'] = { 'jobs': ['data1.dfs', 34, 'data2'], 'experiments': [1, 'outputs1', 2, 3], } config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add secrets config_dict['secret_refs'] = ['secret1', 'secret2'] config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add config_maps config_dict['config_map_refs'] = ['config_map1', 'config_map2'] config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add service_account config_dict['service_account'] = 'service_account' config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add image_pull_secrets config_dict['image_pull_secrets'] = ['pull_secret1', 'pull_secret2'] config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add max_restarts config_dict['max_restarts'] = 4 config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add data_refs config_dict['data_refs'] = ['data1', 'data2'] config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict()) # Add artifact_refs config_dict['artifact_refs'] = ['artifact1', 'artifact2'] config = EnvironmentConfig.from_dict(config_dict) assert_equal_dict(config_dict, config.to_dict())