コード例 #1
0
    def testTaskPerNodeNotSetRetrieval(self):
        slurm_cluster_resolver = SlurmClusterResolver(jobs={
            'ps': 1,
            'worker': 2
        },
                                                      port_base=8888,
                                                      gpus_per_node=1,
                                                      gpus_per_task=1,
                                                      auto_set_gpu=False)

        actual_cluster_spec = slurm_cluster_resolver.cluster_spec()
        expected_proto = """
      job {
        name: "ps"
        tasks {
          value: "t02n13:8888"
        }
      }
      job {
        name: "worker"
        tasks {
          value: "t02n41:8888"
        }
        tasks {
          key: 1
          value: "t02n43:8888"
        }
      }
    """
        self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
コード例 #2
0
    def testSimpleSuccessfulRetrieval(self):
        slurm_cluster_resolver = SlurmClusterResolver(jobs={
            'ps': 1,
            'worker': 2
        },
                                                      port_base=8888,
                                                      tasks_per_node=1,
                                                      gpus_per_node=1,
                                                      gpus_per_task=1,
                                                      auto_set_gpu=False)

        actual_cluster_spec = slurm_cluster_resolver.cluster_spec()
        expected_proto = """
    job { name: 'ps' tasks { value: 't02n13:8888' } }
    job { name: 'worker' tasks { key: 0 value: 't02n41:8888' }
                         tasks { key: 1 value: 't02n43:8888' } }
    """
        self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
コード例 #3
0
  def testTaskPerNodeNotSetRetrieval(self):
    slurm_cluster_resolver = SlurmClusterResolver(
        jobs={
            'ps': 1,
            'worker': 2
        },
        port_base=8888,
        gpus_per_node=1,
        gpus_per_task=1,
        auto_set_gpu=False)

    actual_cluster_spec = slurm_cluster_resolver.cluster_spec()
    expected_proto = """
    job { name: 'ps' tasks { value: 't02n13:8888' } }
    job { name: 'worker' tasks { key: 0 value: 't02n41:8888' }
                         tasks { key: 1 value: 't02n43:8888' } }
    """
    self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
コード例 #4
0
    def testMultipleGpusPerTaskRetrieval(self):
        slurm_cluster_resolver = SlurmClusterResolver(jobs={
            'ps': 1,
            'worker': 4
        },
                                                      port_base=8888,
                                                      gpus_per_node=4,
                                                      gpus_per_task=2,
                                                      auto_set_gpu=True)

        actual_cluster_spec = slurm_cluster_resolver.cluster_spec()
        expected_proto = """
    job { name: 'ps' tasks { value: 't02n13:8888' } }
    job { name: 'worker' tasks { key: 0 value: 't02n13:8889' }
                         tasks { key: 1 value: 't02n41:8888' }
                         tasks { key: 2 value: 't02n41:8889' }
                         tasks { key: 3 value: 't02n43:8888' } }
    """

        self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
        assert os.environ['CUDA_VISIBLE_DEVICES'] == '2,3'
コード例 #5
0
  def testMultipleGpusPerTaskRetrieval(self):
    slurm_cluster_resolver = SlurmClusterResolver(
        jobs={
            'ps': 1,
            'worker': 4
        },
        port_base=8888,
        gpus_per_node=4,
        gpus_per_task=2,
        auto_set_gpu=True)

    actual_cluster_spec = slurm_cluster_resolver.cluster_spec()
    expected_proto = """
    job { name: 'ps' tasks { value: 't02n13:8888' } }
    job { name: 'worker' tasks { key: 0 value: 't02n13:8889' }
                         tasks { key: 1 value: 't02n41:8888' }
                         tasks { key: 2 value: 't02n41:8889' }
                         tasks { key: 3 value: 't02n43:8888' } }
    """

    self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
    assert os.environ['CUDA_VISIBLE_DEVICES'] == '2,3'
コード例 #6
0
    def testSimpleMasterRetrieval(self):
        slurm_cluster_resolver = SlurmClusterResolver(jobs={
            'ps': 1,
            'worker': 2
        },
                                                      port_base=8888,
                                                      tasks_per_node=1,
                                                      gpus_per_node=1,
                                                      gpus_per_task=1,
                                                      auto_set_gpu=False)

        slurm_cluster_resolver.task_type = 'worker'
        slurm_cluster_resolver.task_index = 1
        self.assertEqual(slurm_cluster_resolver.master(), 'grpc://t02n43:8888')

        slurm_cluster_resolver.rpc_layer = 'ab'
        self.assertEqual(slurm_cluster_resolver.master('ps', 0),
                         'ab://t02n13:8888')
        self.assertEqual(
            slurm_cluster_resolver.master('ps', 0, rpc_layer='test'),
            'test://t02n13:8888')