def test_set_spark_context_default(self): """ Check that a `SparkContext` object is created with default options for the current system. """ backend = Backend.SparkBackend() self.assertIsInstance(backend.sc, pyspark.SparkContext)
def test_range_operation_single_thread(self): """ Check that 'Range' operation works in single-threaded mode and raises an Exception in multi-threaded mode. """ backend = Backend.SparkBackend() with self.assertRaises(Exception): backend.check_supported("Range")
def RDataFrame(*args, **kwargs): """ Create a SparkDataFrame object """ from PyRDF.Backends.Spark import Backend sparkcontext = kwargs.get("sparkcontext", None) spark = Backend.SparkBackend(sparkcontext=sparkcontext) return spark.make_dataframe(*args, **kwargs)
def test_unsupported_operations(self): """Check that unsupported operations raise an Exception.""" backend = Backend.SparkBackend() with self.assertRaises(Exception): backend.check_supported("Take") with self.assertRaises(Exception): backend.check_supported("Foreach") with self.assertRaises(Exception): backend.check_supported("Range")
def test_optimize_npartitions_with_num_executors(self): """ Check that the number of partitions is correctly set to number of executors in the SparkConf dictionary. """ conf = {"spark.executor.instances": 10} sconf = pyspark.SparkConf().setAll(conf.items()) sc = pyspark.SparkContext(conf=sconf) backend = Backend.SparkBackend(sparkcontext=sc) self.assertEqual(backend.optimize_npartitions(1), 10)
def test_set_spark_context_with_conf(self): """ Check that a `SparkContext` object is correctly created for a given `SparkConf` object in the config dictionary. """ conf = {"spark.app.name": "my-pyspark-app1"} sconf = pyspark.SparkConf().setAll(conf.items()) sc = pyspark.SparkContext(conf=sconf) backend = Backend.SparkBackend(sparkcontext=sc) self.assertIsInstance(backend.sc, pyspark.SparkContext) appname = backend.sc.getConf().get("spark.app.name") self.assertEqual(appname, "my-pyspark-app1")
def test_none(self): """Check that incorrect operations raise an Exception.""" backend = Backend.SparkBackend() with self.assertRaises(Exception): backend.check_supported("random")
def test_transformation(self): """Check that transformation nodes are classified accurately.""" backend = Backend.SparkBackend() backend.check_supported("Define")
def test_action(self): """Check that action nodes are classified accurately.""" backend = Backend.SparkBackend() backend.check_supported("Histo1D")