Python Spark Examples

Programming Language: Python

Namespace/Package Name: PyRDF.backend.Spark

Class/Type: Spark

Examples at hotexamples.com: 13

Python Spark - 13 examples found. These are the top rated real world Python examples of PyRDF.backend.Spark.Spark extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Spark(13)

check_supported(5)

Frequently Used Methods

Spark (13)

check_supported (5)

Example #1

Show file

    def test_range_operation_single_thread(self):
        """
        Check that 'Range' operation works in single-threaded mode and raises an
        Exception in multi-threaded mode.

        """
        backend = Spark()
        with self.assertRaises(Exception):
            backend.check_supported("Range")

Example #2

Show file

def use(backend_name, conf={}):
    """
    Allows the user to choose the execution backend.

    Args:
        backend_name (str): This is the name of the chosen backend.
        conf (str, optional): This should be a dictionary with
            necessary configuration parameters. Its default value is an empty
            dictionary {}.
    """
    future_backends = ["dask"]

    global current_backend

    if backend_name in future_backends:
        msg = "This backend environment will be considered in the future !"
        raise NotImplementedError(msg)
    elif backend_name == "local":
        current_backend = Local(conf)
    elif backend_name == "spark":
        from PyRDF.backend.Spark import Spark
        current_backend = Spark(conf)
    elif backend_name == "AWS":
        from PyRDF.backend.AWS import AWS
        current_backend = AWS(conf)
    else:
        msg = "Incorrect backend environment \"{}\"".format(backend_name)
        raise Exception(msg)

Example #3

Show file

    def test_initialization_method(self):
        """
        Check initialization method in Spark backend.

        Define a method in the ROOT interpreter called getValue which returns
        the value defined by the user on the python side.

        """
        def init(value):
            import ROOT
            cpp_code = '''int userValue = %s ;''' % value
            ROOT.gInterpreter.ProcessLine(cpp_code)

        PyRDF.initialize(init, 123)
        PyRDF.current_backend = Spark()
        # Spark backend has a limited list of supported methods, so we use
        # Histo1D which is a supported action.
        # The code below creates an RDataFrame instance with one single entry
        # and defines a column 'u' whose value is taken from the variable
        # 'userValue'.
        # This variable is only declared inside the ROOT interpreter, however
        # the value of the variable is passed by the user from the python side.
        # If the init function defined by the user is properly propagated to the
        # Spark backend, each workers will run the init function as a first step
        # and hence the variable 'userValue' will be defined at runtime.
        # As a result the define operation should read the variable 'userValue'
        # and assign it to the entries of the column 'u' (only one entry).
        # Finally, Histo1D returns a histogram filled with one value. The mean
        # of this single value has to be the value itself, independently of
        # the number of spawned workers.
        df = PyRDF.RDataFrame(1).Define("u", "userValue").Histo1D("u")
        h = df.GetValue()
        self.assertEqual(h.GetMean(), 123)

Example #4

Show file

    def test_npartitions_default(self):
        """
        Check that the default number of partitions is correctly set when no
        input value is given in the config dictionary.

        """
        backend = Spark()
        self.assertEqual(backend.npartitions, Spark.MIN_NPARTITIONS)

Example #5

Show file

    def test_npartitions_with_num_executors(self):
        """
        Check that the number of partitions is correctly set to number of
        executors when no input value is given in the config dictionary.

        """
        backend = Spark({'spark.executor.instances': 10})
        self.assertEqual(backend.npartitions, 10)

Example #6

Show file

    def test_set_npartitions_explicit(self):
        """
        Check that the number of partitions is correctly set for a given input
        value in the config dictionary.

        """
        backend = Spark({"npartitions": 5})
        self.assertEqual(backend.npartitions, 5)

Example #7

Show file

    def test_unsupported_operations(self):
        """Check that unsupported operations raise an Exception."""
        backend = Spark()
        with self.assertRaises(Exception):
            backend.check_supported("Take")

        with self.assertRaises(Exception):
            backend.check_supported("Foreach")

        with self.assertRaises(Exception):
            backend.check_supported("Range")

Example #8

Show file

    def test_set_spark_context_default(self):
        """
        Check that if the config dictionary is empty, a `SparkContext`
        object is still created with default options for the current system.

        """
        backend = Spark()

        self.assertDictEqual(backend.config, {})
        self.assertIsInstance(backend.sparkContext, SparkContext)

Example #9

Show file

    def test_npartitions_with_already_existing_spark_context(self):
        """
        Check that the number of partitions is correctly set when a Spark
        Context already exists.

        """
        from pyspark import SparkConf
        sparkConf = SparkConf().set('spark.executor.instances', 15)
        SparkContext(conf=sparkConf)
        backend = Spark()
        self.assertEqual(backend.npartitions, 15)

Example #10

Show file

    def test_set_spark_context_with_conf(self):
        """
        Check that a `SparkContext` object is correctly created for a given
        `SparkConf` object in the config dictionary.

        """
        backend = Spark({'spark.app.name': 'my-pyspark-app1'})

        self.assertIsInstance(backend.sparkContext, SparkContext)
        appname = backend.sparkContext.getConf().get('spark.app.name')
        self.assertEqual(appname, 'my-pyspark-app1')

Example #11

Show file

 def test_none(self):
     """Check that incorrect operations raise an Exception."""
     backend = Spark()
     with self.assertRaises(Exception):
         backend.check_supported("random")

Example #12

Show file

 def test_transformation(self):
     """Check that transformation nodes are classified accurately."""
     backend = Spark()
     backend.check_supported("Define")

Example #13

Show file

 def test_action(self):
     """Check that action nodes are classified accurately."""
     backend = Spark()
     backend.check_supported("Histo1D")