Example #1
0
    def test_range_operation_single_thread(self):
        """
        Check that 'Range' operation works in single-threaded mode and raises an
        Exception in multi-threaded mode.

        """
        backend = Spark()
        with self.assertRaises(Exception):
            backend.check_supported("Range")
Example #2
0
def use(backend_name, conf={}):
    """
    Allows the user to choose the execution backend.

    Args:
        backend_name (str): This is the name of the chosen backend.
        conf (str, optional): This should be a dictionary with
            necessary configuration parameters. Its default value is an empty
            dictionary {}.
    """
    future_backends = ["dask"]

    global current_backend

    if backend_name in future_backends:
        msg = "This backend environment will be considered in the future !"
        raise NotImplementedError(msg)
    elif backend_name == "local":
        current_backend = Local(conf)
    elif backend_name == "spark":
        from PyRDF.backend.Spark import Spark
        current_backend = Spark(conf)
    elif backend_name == "AWS":
        from PyRDF.backend.AWS import AWS
        current_backend = AWS(conf)
    else:
        msg = "Incorrect backend environment \"{}\"".format(backend_name)
        raise Exception(msg)
Example #3
0
    def test_initialization_method(self):
        """
        Check initialization method in Spark backend.

        Define a method in the ROOT interpreter called getValue which returns
        the value defined by the user on the python side.

        """
        def init(value):
            import ROOT
            cpp_code = '''int userValue = %s ;''' % value
            ROOT.gInterpreter.ProcessLine(cpp_code)

        PyRDF.initialize(init, 123)
        PyRDF.current_backend = Spark()
        # Spark backend has a limited list of supported methods, so we use
        # Histo1D which is a supported action.
        # The code below creates an RDataFrame instance with one single entry
        # and defines a column 'u' whose value is taken from the variable
        # 'userValue'.
        # This variable is only declared inside the ROOT interpreter, however
        # the value of the variable is passed by the user from the python side.
        # If the init function defined by the user is properly propagated to the
        # Spark backend, each workers will run the init function as a first step
        # and hence the variable 'userValue' will be defined at runtime.
        # As a result the define operation should read the variable 'userValue'
        # and assign it to the entries of the column 'u' (only one entry).
        # Finally, Histo1D returns a histogram filled with one value. The mean
        # of this single value has to be the value itself, independently of
        # the number of spawned workers.
        df = PyRDF.RDataFrame(1).Define("u", "userValue").Histo1D("u")
        h = df.GetValue()
        self.assertEqual(h.GetMean(), 123)
Example #4
0
    def test_npartitions_default(self):
        """
        Check that the default number of partitions is correctly set when no
        input value is given in the config dictionary.

        """
        backend = Spark()
        self.assertEqual(backend.npartitions, Spark.MIN_NPARTITIONS)
Example #5
0
    def test_npartitions_with_num_executors(self):
        """
        Check that the number of partitions is correctly set to number of
        executors when no input value is given in the config dictionary.

        """
        backend = Spark({'spark.executor.instances': 10})
        self.assertEqual(backend.npartitions, 10)
Example #6
0
    def test_set_npartitions_explicit(self):
        """
        Check that the number of partitions is correctly set for a given input
        value in the config dictionary.

        """
        backend = Spark({"npartitions": 5})
        self.assertEqual(backend.npartitions, 5)
Example #7
0
    def test_unsupported_operations(self):
        """Check that unsupported operations raise an Exception."""
        backend = Spark()
        with self.assertRaises(Exception):
            backend.check_supported("Take")

        with self.assertRaises(Exception):
            backend.check_supported("Foreach")

        with self.assertRaises(Exception):
            backend.check_supported("Range")
Example #8
0
    def test_set_spark_context_default(self):
        """
        Check that if the config dictionary is empty, a `SparkContext`
        object is still created with default options for the current system.

        """
        backend = Spark()

        self.assertDictEqual(backend.config, {})
        self.assertIsInstance(backend.sparkContext, SparkContext)
Example #9
0
    def test_npartitions_with_already_existing_spark_context(self):
        """
        Check that the number of partitions is correctly set when a Spark
        Context already exists.

        """
        from pyspark import SparkConf
        sparkConf = SparkConf().set('spark.executor.instances', 15)
        SparkContext(conf=sparkConf)
        backend = Spark()
        self.assertEqual(backend.npartitions, 15)
Example #10
0
    def test_set_spark_context_with_conf(self):
        """
        Check that a `SparkContext` object is correctly created for a given
        `SparkConf` object in the config dictionary.

        """
        backend = Spark({'spark.app.name': 'my-pyspark-app1'})

        self.assertIsInstance(backend.sparkContext, SparkContext)
        appname = backend.sparkContext.getConf().get('spark.app.name')
        self.assertEqual(appname, 'my-pyspark-app1')
Example #11
0
 def test_none(self):
     """Check that incorrect operations raise an Exception."""
     backend = Spark()
     with self.assertRaises(Exception):
         backend.check_supported("random")
Example #12
0
 def test_transformation(self):
     """Check that transformation nodes are classified accurately."""
     backend = Spark()
     backend.check_supported("Define")
Example #13
0
 def test_action(self):
     """Check that action nodes are classified accurately."""
     backend = Spark()
     backend.check_supported("Histo1D")