Ejemplo n.º 1
0
    def test_resourceprofile(self):
        rp_builder = ResourceProfileBuilder()
        ereqs = ExecutorResourceRequests().cores(2).memory(
            "6g").memoryOverhead("1g")
        ereqs.pysparkMemory("2g").resource("gpu", 2, "testGpus", "nvidia.com")
        treqs = TaskResourceRequests().cpus(2).resource("gpu", 2)

        def assert_request_contents(exec_reqs, task_reqs):
            self.assertEqual(len(exec_reqs), 5)
            self.assertEqual(exec_reqs["cores"].amount, 2)
            self.assertEqual(exec_reqs["memory"].amount, 6144)
            self.assertEqual(exec_reqs["memoryOverhead"].amount, 1024)
            self.assertEqual(exec_reqs["pyspark.memory"].amount, 2048)
            self.assertEqual(exec_reqs["gpu"].amount, 2)
            self.assertEqual(exec_reqs["gpu"].discoveryScript, "testGpus")
            self.assertEqual(exec_reqs["gpu"].resourceName, "gpu")
            self.assertEqual(exec_reqs["gpu"].vendor, "nvidia.com")
            self.assertEqual(len(task_reqs), 2)
            self.assertEqual(task_reqs["cpus"].amount, 2.0)
            self.assertEqual(task_reqs["gpu"].amount, 2.0)

        assert_request_contents(ereqs.requests, treqs.requests)
        rp = rp_builder.require(ereqs).require(treqs).build
        assert_request_contents(rp.executorResources, rp.taskResources)
        rdd = self.sc.parallelize(range(10)).withResources(rp)
        return_rp = rdd.getResourceProfile()
        assert_request_contents(return_rp.executorResources,
                                return_rp.taskResources)
        rddWithoutRp = self.sc.parallelize(range(10))
        self.assertEqual(rddWithoutRp.getResourceProfile(), None)
Ejemplo n.º 2
0
    def test_profile_before_sc(self):
        rpb = ResourceProfileBuilder()
        ereqs = ExecutorResourceRequests().cores(2).memory(
            "6g").memoryOverhead("1g")
        ereqs.pysparkMemory("2g").offheapMemory("3g").resource(
            "gpu", 2, "testGpus", "nvidia.com")
        treqs = TaskResourceRequests().cpus(2).resource("gpu", 2)

        def assert_request_contents(exec_reqs, task_reqs):
            self.assertEqual(len(exec_reqs), 6)
            self.assertEqual(exec_reqs["cores"].amount, 2)
            self.assertEqual(exec_reqs["memory"].amount, 6144)
            self.assertEqual(exec_reqs["memoryOverhead"].amount, 1024)
            self.assertEqual(exec_reqs["pyspark.memory"].amount, 2048)
            self.assertEqual(exec_reqs["offHeap"].amount, 3072)
            self.assertEqual(exec_reqs["gpu"].amount, 2)
            self.assertEqual(exec_reqs["gpu"].discoveryScript, "testGpus")
            self.assertEqual(exec_reqs["gpu"].resourceName, "gpu")
            self.assertEqual(exec_reqs["gpu"].vendor, "nvidia.com")
            self.assertEqual(len(task_reqs), 2)
            self.assertEqual(task_reqs["cpus"].amount, 2.0)
            self.assertEqual(task_reqs["gpu"].amount, 2.0)

        assert_request_contents(ereqs.requests, treqs.requests)
        rp = rpb.require(ereqs).require(treqs).build
        assert_request_contents(rp.executorResources, rp.taskResources)
        from pyspark import SparkContext, SparkConf
        sc = SparkContext(conf=SparkConf())
        rdd = sc.parallelize(range(10)).withResources(rp)
        return_rp = rdd.getResourceProfile()
        assert_request_contents(return_rp.executorResources,
                                return_rp.taskResources)
        # intermix objects created before SparkContext init and after
        rpb2 = ResourceProfileBuilder()
        # use reqs created before SparkContext with Builder after
        rpb2.require(ereqs)
        rpb2.require(treqs)
        rp2 = rpb2.build
        self.assertTrue(rp2.id > 0)
        rdd2 = sc.parallelize(range(10)).withResources(rp2)
        return_rp2 = rdd2.getResourceProfile()
        assert_request_contents(return_rp2.executorResources,
                                return_rp2.taskResources)
        ereqs2 = ExecutorResourceRequests().cores(2).memory(
            "6g").memoryOverhead("1g")
        ereqs.pysparkMemory("2g").resource("gpu", 2, "testGpus", "nvidia.com")
        treqs2 = TaskResourceRequests().cpus(2).resource("gpu", 2)
        # use reqs created after SparkContext with Builder before
        rpb.require(ereqs2)
        rpb.require(treqs2)
        rp3 = rpb.build
        assert_request_contents(rp3.executorResources, rp3.taskResources)
        sc.stop()