def test_resourceprofile(self): rp_builder = ResourceProfileBuilder() ereqs = ExecutorResourceRequests().cores(2).memory( "6g").memoryOverhead("1g") ereqs.pysparkMemory("2g").resource("gpu", 2, "testGpus", "nvidia.com") treqs = TaskResourceRequests().cpus(2).resource("gpu", 2) def assert_request_contents(exec_reqs, task_reqs): self.assertEqual(len(exec_reqs), 5) self.assertEqual(exec_reqs["cores"].amount, 2) self.assertEqual(exec_reqs["memory"].amount, 6144) self.assertEqual(exec_reqs["memoryOverhead"].amount, 1024) self.assertEqual(exec_reqs["pyspark.memory"].amount, 2048) self.assertEqual(exec_reqs["gpu"].amount, 2) self.assertEqual(exec_reqs["gpu"].discoveryScript, "testGpus") self.assertEqual(exec_reqs["gpu"].resourceName, "gpu") self.assertEqual(exec_reqs["gpu"].vendor, "nvidia.com") self.assertEqual(len(task_reqs), 2) self.assertEqual(task_reqs["cpus"].amount, 2.0) self.assertEqual(task_reqs["gpu"].amount, 2.0) assert_request_contents(ereqs.requests, treqs.requests) rp = rp_builder.require(ereqs).require(treqs).build assert_request_contents(rp.executorResources, rp.taskResources) rdd = self.sc.parallelize(range(10)).withResources(rp) return_rp = rdd.getResourceProfile() assert_request_contents(return_rp.executorResources, return_rp.taskResources) rddWithoutRp = self.sc.parallelize(range(10)) self.assertEqual(rddWithoutRp.getResourceProfile(), None)
def test_profile_before_sc(self): rpb = ResourceProfileBuilder() ereqs = ExecutorResourceRequests().cores(2).memory( "6g").memoryOverhead("1g") ereqs.pysparkMemory("2g").offheapMemory("3g").resource( "gpu", 2, "testGpus", "nvidia.com") treqs = TaskResourceRequests().cpus(2).resource("gpu", 2) def assert_request_contents(exec_reqs, task_reqs): self.assertEqual(len(exec_reqs), 6) self.assertEqual(exec_reqs["cores"].amount, 2) self.assertEqual(exec_reqs["memory"].amount, 6144) self.assertEqual(exec_reqs["memoryOverhead"].amount, 1024) self.assertEqual(exec_reqs["pyspark.memory"].amount, 2048) self.assertEqual(exec_reqs["offHeap"].amount, 3072) self.assertEqual(exec_reqs["gpu"].amount, 2) self.assertEqual(exec_reqs["gpu"].discoveryScript, "testGpus") self.assertEqual(exec_reqs["gpu"].resourceName, "gpu") self.assertEqual(exec_reqs["gpu"].vendor, "nvidia.com") self.assertEqual(len(task_reqs), 2) self.assertEqual(task_reqs["cpus"].amount, 2.0) self.assertEqual(task_reqs["gpu"].amount, 2.0) assert_request_contents(ereqs.requests, treqs.requests) rp = rpb.require(ereqs).require(treqs).build assert_request_contents(rp.executorResources, rp.taskResources) from pyspark import SparkContext, SparkConf sc = SparkContext(conf=SparkConf()) rdd = sc.parallelize(range(10)).withResources(rp) return_rp = rdd.getResourceProfile() assert_request_contents(return_rp.executorResources, return_rp.taskResources) # intermix objects created before SparkContext init and after rpb2 = ResourceProfileBuilder() # use reqs created before SparkContext with Builder after rpb2.require(ereqs) rpb2.require(treqs) rp2 = rpb2.build self.assertTrue(rp2.id > 0) rdd2 = sc.parallelize(range(10)).withResources(rp2) return_rp2 = rdd2.getResourceProfile() assert_request_contents(return_rp2.executorResources, return_rp2.taskResources) ereqs2 = ExecutorResourceRequests().cores(2).memory( "6g").memoryOverhead("1g") ereqs.pysparkMemory("2g").resource("gpu", 2, "testGpus", "nvidia.com") treqs2 = TaskResourceRequests().cpus(2).resource("gpu", 2) # use reqs created after SparkContext with Builder before rpb.require(ereqs2) rpb.require(treqs2) rp3 = rpb.build assert_request_contents(rp3.executorResources, rp3.taskResources) sc.stop()