def test_pod_resources_add(self):
        config_dict1 = {
            'cpu': {
                'requests': 0.8,
            },
            'gpu': {
                'requests': 2,
            },
            'memory': {
                'requests': 200,
                'limits': 300
            },
        }

        config_dict2 = {
            'gpu': {
                'limits': 4
            },
            'memory': {
                'requests': 300,
                'limits': 200
            },
        }
        config1 = PodResourcesConfig.from_dict(config_dict1)
        config2 = PodResourcesConfig.from_dict(config_dict2)

        config = config1 + config2
        assert config.cpu.to_dict() == {'requests': 0.8, 'limits': None}
        assert config.memory.to_dict() == {'requests': 500, 'limits': 500}
        assert config.gpu.to_dict() == {'requests': 2, 'limits': 4}
Exemple #2
0
    def get_total_resources(cls, master_resources, environment, cluster, is_distributed):
        worker_resources = cls.get_worker_resources(
            environment=environment,
            cluster=cluster,
            is_distributed=is_distributed,
        )
        ps_resources = cls.get_ps_resources(
            environment=environment,
            cluster=cluster,
            is_distributed=is_distributed,
        )
        if not any([master_resources, worker_resources, ps_resources]):
            return None

        total_resources = PodResourcesConfig()

        if master_resources:
            total_resources += master_resources

        for w_resources in six.itervalues(worker_resources or {}):
            total_resources += w_resources

        for p_resources in six.itervalues(ps_resources or {}):
            total_resources += p_resources

        return total_resources.to_dict()
    def test_environment_config(self):
        config_dict = {
            'resources':
            PodResourcesConfig(cpu=K8SResourcesConfig(0.5, 1)).to_dict()
        }
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add tensorflow
        config_dict['tensorflow'] = {
            'n_workers': 10,
            'n_ps': 5,
        }

        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add mxnet should raise
        config_dict['mxnet'] = {
            'n_workers': 10,
            'n_ps': 5,
        }

        with self.assertRaises(ValidationError):
            EnvironmentConfig.from_dict(config_dict)

        # Removing tensorflow should pass for mxnet
        del config_dict['tensorflow']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Adding horovod should raise
        config_dict['horovod'] = {'n_workers': 5}

        with self.assertRaises(ValidationError):
            EnvironmentConfig.from_dict(config_dict)

        # Removing mxnet should pass for horovod
        del config_dict['mxnet']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Adding pytorch should raise
        config_dict['pytorch'] = {'n_workers': 5}

        with self.assertRaises(ValidationError):
            EnvironmentConfig.from_dict(config_dict)

        # Removing horovod should pass for pytorch
        del config_dict['horovod']
        config = EnvironmentConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())
    def test_mxnet_config(self):
        config_dict = {
            'n_workers': 10,
            'n_ps': 5,
        }
        config = MXNetConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add default worker resources
        config_dict['default_worker_resources'] = PodResourcesConfig(
            cpu=K8SResourcesConfig(0.5,
                                   1), gpu=K8SResourcesConfig(2, 4)).to_dict()
        config = MXNetConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add default ps resources
        config_dict['default_ps_resources'] = PodResourcesConfig(
            cpu=K8SResourcesConfig(0.5, 1),
            memory=K8SResourcesConfig(256, 400)).to_dict()
        config = MXNetConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Adding custom resources for worker 4
        config_dict['worker_resources'] = [
            PodResourcesConfig(index=4,
                               cpu=K8SResourcesConfig(0.5, 1),
                               memory=K8SResourcesConfig(256, 400)).to_dict()
        ]
        config = MXNetConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Adding custom resources for ps 4
        config_dict['ps_resources'] = [
            PodResourcesConfig(index=4,
                               cpu=K8SResourcesConfig(0.5, 1),
                               memory=K8SResourcesConfig(256, 400)).to_dict()
        ]
        config = MXNetConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())
 def test_pod_resources_config(self):
     config_dict = {
         'cpu': {
             'requests': 0.8,
             'limits': 1
         },
         'gpu': {
             'requests': 2,
             'limits': 4
         },
         'memory': {
             'requests': 265,
             'limits': 512
         },
     }
     config = PodResourcesConfig.from_dict(config_dict)
     assert_equal_dict(config_dict, config.to_dict())
    def test_tensorflow_config(self):
        config_dict = {
            'n_workers': 10,
            'n_ps': 5,
            'delay_workers_by_global_step': False
        }
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add run config
        config_dict['run_config'] = RunConfig().to_dict()
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add default worker session config
        config_dict['default_worker_config'] = SessionConfig(
            intra_op_parallelism_threads=1,
            inter_op_parallelism_threads=3).to_dict()
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add default worker resources
        config_dict['default_worker_resources'] = PodResourcesConfig(
            cpu=K8SResourcesConfig(0.5,
                                   1), gpu=K8SResourcesConfig(2, 4)).to_dict()
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add default ps session config
        config_dict['default_ps_config'] = SessionConfig(
            intra_op_parallelism_threads=0,
            inter_op_parallelism_threads=2).to_dict()
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Add default ps resources
        config_dict['default_ps_resources'] = PodResourcesConfig(
            cpu=K8SResourcesConfig(0.5, 1),
            memory=K8SResourcesConfig(256, 400)).to_dict()
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Adding custom config for worker 3
        config_dict['worker_configs'] = [
            SessionConfig(
                index=3,
                gpu_options=GPUOptionsConfig(gpu_memory_fraction=0.4),
                intra_op_parallelism_threads=8,
                inter_op_parallelism_threads=8).to_dict()
        ]
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Adding custom resources for worker 4
        config_dict['worker_resources'] = [
            PodResourcesConfig(index=4,
                               cpu=K8SResourcesConfig(0.5, 1),
                               memory=K8SResourcesConfig(256, 400)).to_dict()
        ]
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Adding custom config for ps 2
        config_dict['ps_configs'] = [
            SessionConfig(index=2,
                          gpu_options=GPUOptionsConfig(allow_growth=False),
                          intra_op_parallelism_threads=1,
                          inter_op_parallelism_threads=1).to_dict()
        ]
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())

        # Adding custom resources for ps 4
        config_dict['ps_resources'] = [
            PodResourcesConfig(index=4,
                               cpu=K8SResourcesConfig(0.5, 1),
                               memory=K8SResourcesConfig(256, 400)).to_dict()
        ]
        config = TensorflowConfig.from_dict(config_dict)
        assert_equal_dict(config_dict, config.to_dict())