def get_total_resources(cls, master_resources, environment, cluster, is_distributed):
        worker_resources = cls.get_worker_resources(
            environment=environment,
            cluster=cluster,
            is_distributed=is_distributed,
        )
        ps_resources = cls.get_ps_resources(
            environment=environment,
            cluster=cluster,
            is_distributed=is_distributed,
        )
        if not any([master_resources, worker_resources, ps_resources]):
            return None

        total_resources = PodResourcesConfig()

        if master_resources:
            total_resources += master_resources

        for w_resources in six.itervalues(worker_resources or {}):
            total_resources += w_resources

        for p_resources in six.itervalues(ps_resources or {}):
            total_resources += p_resources

        return total_resources.to_dict()
Exemple #2
0
    def test_pytorch_config(self):
        config_dict = {
            'n_workers': 10,
        }
        config = PytorchConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add default worker resources
        config_dict['default_worker'] = {
            'resources':
            PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1),
                               tpu=K8SResourcesConfig(1, 1),
                               gpu=K8SResourcesConfig(2, 4)).to_dict()
        }
        config = PytorchConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Adding custom resources for worker 4
        config_dict['worker'] = [{
            'index':
            4,
            'resources':
            PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1),
                               memory=K8SResourcesConfig(256, 400)).to_dict()
        }]
        config = PytorchConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())
Exemple #3
0
    def test_tensorflow_config(self):
        config_dict = {
            'n_workers': 10,
            'n_ps': 5,
        }
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add run config
        config_dict['run_config'] = TFRunConfig().to_dict()
        with self.assertRaises(ValidationError):
            TensorflowConfig.from_dict(config_dict)
        del config_dict['run_config']

        # Add default worker resources
        config_dict['default_worker'] = {
            'resources':
            PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1),
                               gpu=K8SResourcesConfig(2, 4),
                               tpu=K8SResourcesConfig(2, 8)).to_dict(),
        }
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_light_dict())

        # Add default ps resources
        config_dict['default_ps'] = {
            'resources':
            PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1),
                               memory=K8SResourcesConfig(256, 400)).to_dict()
        }
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_light_dict())

        # Adding custom resources for worker 4
        config_dict['worker'] = [{
            'index':
            4,
            'resources':
            PodResourcesConfig(
                cpu=K8SResourcesConfig(0.5, 1),
                memory=K8SResourcesConfig(256, 400),
            ).to_dict()
        }]
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_light_dict())

        # Adding custom resources for ps 4
        config_dict['ps'] = [{
            'index':
            4,
            'resources':
            PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1),
                               memory=K8SResourcesConfig(256, 400)).to_dict()
        }]
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_light_dict())
    def test_pod_resources_add(self):
        config_dict1 = {
            'cpu': {
                'requests': 0.8,
            },
            'gpu': {
                'requests': 2,
            },
            'tpu': {
                'requests': 2,
                'limits': 4
            },
            'memory': {
                'requests': 200,
                'limits': 300
            },
        }

        config_dict2 = {
            'gpu': {
                'limits': 4
            },
            'tpu': {
                'requests': 2,
            },
            'memory': {
                'requests': 300,
                'limits': 200
            },
        }
        config1 = PodResourcesConfig.from_dict(config_dict1)
        config2 = PodResourcesConfig.from_dict(config_dict2)

        config = config1 + config2
        assert config.cpu.to_dict() == {'requests': 0.8}
        assert config.memory.to_dict() == {'requests': 500, 'limits': 500}
        assert config.gpu.to_dict() == {'requests': 2, 'limits': 4}
        assert config.tpu.to_dict() == {'requests': 4, 'limits': 4}
    def test_environment_config(self):
        config_dict = {
            'resources':
            PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1)).to_dict()
        }
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add node selectors
        config_dict['node_selector'] = {
            'polyaxon.com': 'master',
        }

        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add persistence
        config_dict['persistence'] = {
            'data': ['data1', 'data2'],
            'outputs': 'outputs1',
        }
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add outputs
        config_dict['outputs'] = {
            'jobs': ['data1.dfs', 34, 'data2'],
            'experiments': [1, 'outputs1', 2, 3],
        }
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add secrets
        config_dict['secret_refs'] = ['secret1', 'secret2']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add configmaps
        config_dict['configmap_refs'] = ['configmap1', 'configmap2']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())
 def test_pod_resources_config(self):
     config_dict = {
         'cpu': {
             'requests': 0.8,
             'limits': 1
         },
         'gpu': {
             'requests': 2,
             'limits': 4
         },
         'tpu': {
             'requests': 2,
             'limits': 4
         },
         'memory': {
             'requests': 265,
             'limits': 512
         },
     }
     config = PodResourcesConfig.from_dict(config_dict)
     assert_equal_dict(config_dict, config.to_dict())
Exemple #7
0
    def test_notebook_environment_config(self):
        config_dict = {
            'environment': {
                'resources':
                PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1)).to_dict(),
            }
        }
        config = NotebookConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        config_dict['backend'] = 'notebook'
        config = NotebookConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        config_dict['backend'] = 'lab'
        config = NotebookConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        config_dict['backend'] = 'foo'
        with self.assertRaises(ValidationError):
            NotebookConfig.from_dict(config_dict)
Exemple #8
0
    def test_experiment_environment_config(self):
        config_dict = {
            'resources': PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1)).to_dict(),
            'replicas': {
                'n_workers': 10,
                'n_ps': 5,
            }
        }
        config = ExperimentEnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add some field should raise
        config_dict['foo'] = {
            'n_workers': 10,
            'n_ps': 5,
        }

        with self.assertRaises(ValidationError):
            ExperimentEnvironmentConfig.from_dict(config_dict)

        del config_dict['foo']

        experiment_config = {
            'environment': config_dict,
            'framework': 'tensorflow'
        }
        config = ExperimentConfig.from_dict(experiment_config)
        assert_equal_dict(experiment_config, config.to_dict())

        # Removing framework tensorflow should raise
        del experiment_config['framework']
        with self.assertRaises(ValidationError):
            ExperimentConfig.from_dict(experiment_config)

        # Using unknown framework should raise
        experiment_config['framework'] = 'foo'
        with self.assertRaises(ValidationError):
            ExperimentConfig.from_dict(experiment_config)

        # Using known framework
        experiment_config['framework'] = 'mxnet'
        config = ExperimentConfig.from_dict(experiment_config)
        assert_equal_dict(experiment_config, config.to_dict())

        # Adding horovod should raise
        experiment_config['framework'] = 'horovod'
        with self.assertRaises(ValidationError):
            ExperimentConfig.from_dict(experiment_config)

        # Setting correct horovod replicas should pass
        experiment_config['environment']['replicas'] = {
            'n_workers': 5
        }
        config = ExperimentConfig.from_dict(experiment_config)
        assert_equal_dict(experiment_config, config.to_dict())

        # Adding pytorch should pass
        experiment_config['framework'] = 'pytorch'
        config = ExperimentConfig.from_dict(experiment_config)
        assert_equal_dict(experiment_config, config.to_dict())

        # Setting wrong pytorch replicas should raise
        experiment_config['environment']['replicas'] = {
            'n_workers': 5,
            'n_ps': 1
        }

        with self.assertRaises(ValidationError):
            ExperimentConfig.from_dict(experiment_config)
Exemple #9
0
    def test_environment_config(self):
        config_dict = {
            'resources':
            PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1)).to_dict()
        }
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add node selectors
        config_dict['node_selector'] = {
            'polyaxon.com': 'master',
        }

        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add labels
        config_dict['labels'] = {
            'foo': 'bar',
        }

        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add annotations
        config_dict['annotations'] = {
            'foo': 'bar',
        }

        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add persistence
        config_dict['data_refs'] = ['data1', 'data2']
        config_dict['artifact_refs'] = ['outputs1']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        config_dict['outputs'] = {
            'jobs': ['data1.dfs', 34, 'data2'],
            'experiments': [1, 'outputs1', 2, 3],
        }
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add secrets
        config_dict['secret_refs'] = ['secret1', 'secret2']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add config_maps
        config_dict['config_map_refs'] = ['config_map1', 'config_map2']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add service_account
        config_dict['service_account'] = 'service_account'
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add image_pull_secrets
        config_dict['image_pull_secrets'] = ['pull_secret1', 'pull_secret2']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add max_restarts
        config_dict['max_restarts'] = 4
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add data_refs
        config_dict['data_refs'] = ['data1', 'data2']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add artifact_refs
        config_dict['artifact_refs'] = ['artifact1', 'artifact2']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())