Example #1
0
 def test_get_resource_incorrect_input(self):
     res1 = Resource(cpu=1, gpu=2, memMB=128)
     res2 = Resource(cpu=1, gpu=2, memMB=256)
     with self.assertRaises(ValueError):
         Container("torch").require(
             {"default": res1, "test_scheduler": res2}, "new_scheduler"
         )
Example #2
0
 def test_get_resource_none(self):
     res1 = Resource(cpu=1, gpu=2, memMB=128)
     res2 = Resource(cpu=1, gpu=2, memMB=256)
     container = Container("torch").require(
         {"default": res1, "test_scheduler": res2}
     )
     self.assertEqual(NULL_RESOURCE, container.get_resource("non-existent"))
Example #3
0
 def test_create_container_with_resource(self):
     res1 = Resource(cpu=1, gpu=2, memMB=128)
     res2 = Resource(cpu=1, gpu=2, memMB=256)
     container = (Container("torch").require(res1, "default").require(
         res2, "test_scheduler"))
     self.assertEqual(2, len(container.resources))
     self.assertEqual(res1, container.resources["default"])
     self.assertEqual(res2, container.resources["test_scheduler"])
Example #4
0
 def test_get_resource_mapping(self):
     res1 = Resource(cpu=1, gpu=2, memMB=128)
     res2 = Resource(cpu=1, gpu=2, memMB=256)
     container = Container("torch").require({"default": res1, ALL: res2})
     self.assertEqual(2, len(container.resources))
     self.assertEqual(res1, container.get_resource("default"))
     self.assertEqual(res2, container.get_resource(ALL))
     self.assertEqual(res2, container.get_resource("unknown_scheduler"))
Example #5
0
    def test_copy_resource(self):
        old_capabilities = {"test_key": "test_value", "old_key": "old_value"}
        resource = Resource(1, 2, 3, old_capabilities)
        new_resource = Resource.copy(
            resource, test_key="test_value_new", new_key="new_value"
        )
        self.assertEqual(new_resource.cpu, 1)
        self.assertEqual(new_resource.gpu, 2)
        self.assertEqual(new_resource.memMB, 3)

        self.assertEqual(len(new_resource.capabilities), 3)
        self.assertEqual(new_resource.capabilities["old_key"], "old_value")
        self.assertEqual(new_resource.capabilities["test_key"], "test_value_new")
        self.assertEqual(new_resource.capabilities["new_key"], "new_value")
        self.assertEqual(resource.capabilities["test_key"], "test_value")
Example #6
0
 def test_validate_invalid_replicas(self):
     session = self.MockSession()
     with self.assertRaises(ValueError):
         container = Container("torch").require(
             Resource(cpu=1, gpu=0, memMB=500))
         role = (Role("no container").runs(
             "echo", "hello_world").on(container).replicas(0))
         app = Application("no container").of(role)
         session.run(app)
Example #7
0
    def test_json_serialization(self):
        """
        Tests that an ElasticRole can be serialized into json (dict)
        then recreated as a Role. An ElasticRole is really just a builder
        utility to make it easy for users to create a Role with the entrypoint
        being ``torchelastic.distributed.launch``
        """
        resource = Resource(cpu=1, gpu=0, memMB=512)
        container = Container(image="user_image",
                              resources={
                                  "default": resource
                              }).ports(tensorboard=8080)
        elastic_role = (ElasticRole("test_role",
                                    nnodes="2:4",
                                    rdzv_backend="etcd",
                                    rdzv_id="foobar").runs(
                                        "user_script.py", "--script_arg",
                                        "foo").on(container).replicas(3))

        # this is effectively JSON
        elastic_json = dataclasses.asdict(elastic_role)
        container_json = elastic_json.pop("container")
        resources_json = container_json.pop("resources")
        container_json["resources"] = {}
        for sched, resource_json in resources_json.items():
            container_json["resources"][sched] = Resource(**resource_json)

        role = Role(
            **elastic_json,
            container=Container(**container_json),
        )
        self.assertEqual(container, role.container)
        self.assertEqual(elastic_role.name, role.name)
        self.assertEqual(elastic_role.entrypoint, role.entrypoint)
        self.assertEqual(
            elastic_role.args,
            role.args,
        )
        self.assertEqual(dataclasses.asdict(elastic_role),
                         dataclasses.asdict(role))
Example #8
0
 def test_get_resource_specific(self):
     res = Resource(cpu=1, gpu=2, memMB=128)
     container = Container("torch").require(res, scheduler="foobar")
     self.assertEqual(res, container.get_resource("foobar"))
     self.assertEqual(NULL_RESOURCE,
                      container.get_resource("any_scheduler"))
Example #9
0
 def test_get_resource_all(self):
     res = Resource(cpu=1, gpu=2, memMB=128)
     container = Container("torch").require(res)
     self.assertEqual(res, container.get_resource("any_scheduler"))
Example #10
0
 def test_create_container_no_backend(self):
     res1 = Resource(cpu=1, gpu=2, memMB=128)
     container = Container("torch").require(res1)
     self.assertEqual(1, len(container.resources))
     self.assertEqual(res1, container.resources[ALL])
Example #11
0
 def test_create_container_with_resource(self):
     res1 = Resource(cpu=1, gpu=2, memMB=128)
     res2 = Resource(cpu=1, gpu=2, memMB=256)
     container = Container("torch").require(res1).require(res2)
     self.assertEqual(res2, container.resources)
class resource:
    SMALL = Resource(cpu=1, gpu=0, memMB=1024)
    MEDIUM = Resource(cpu=4, gpu=0, memMB=(4 * 1024))
    LARGE = Resource(cpu=16, gpu=0, memMB=(16 * 1024))