Esempio n. 1
0
        >>> f.result() is None               # doctest: +SKIP
        True
        """),
    "foreachPartition": ("""Asynchronously applies a function f to each partition of this RDD
        and returns a :py:class:`concurrent.futures.Future` of this action.

        >>> def g(xs):                       # doctest: +SKIP
        ...     for x in xs:
        ...         print(x)
        >>> rdd = sc.parallelize(range(10))  # doctest: +SKIP
        >>> f = rdd.foreachPartitionAsync(g) # doctest: +SKIP
        >>> f.result() is None               # doctest: +SKIP
        """),
    "take": ("""Returns a :py:class:`concurrent.futures.Future` for retrieving
        the first num elements of the RDD.

        >>> rdd = sc.parallelize(range(10))  # doctest: +SKIP
        >>> f = rdd.takeAsync(3)             # doctest: +SKIP
        >>> f.result() is None               # doctest: +SKIP
        [0, 1, 2]
        """),
    "saveAsTextFile": ("""Asynchronously save this RDD as a text file, using string representations of elements
        and returns :py:class:`concurrent.futures.Future` of this action.

        :param path: path to text file
        :param compressionCodecClass: (None by default) string i.e. "org.apache.hadoop.io.compress.GzipCodec"
        """)
}

patch_all(RDD, actions)
Esempio n. 2
0
        and returns a :py:class:`concurrent.futures.Future` of this action.

        >>> def g(x): print(x)               # doctest: +SKIP
        >>> df = spark.range(10)             # doctest: +SKIP
        >>> f = df.foreachAsync(g)           # doctest: +SKIP
        >>> f.result() is None               # doctest: +SKIP
        True
        """),
    "foreachPartition":
    ("""Asynchronously applies a function f to each partition of this DataFrame
        and returns a :py:class:`concurrent.futures.Future` of this action.

        >>> def g(xs):                       # doctest: +SKIP
        ...     for x in xs:
        ...         print(x)
        >>> df = spark.range(10)             # doctest: +SKIP
        >>> f = df.foreachPartitionAsync(g)  # doctest: +SKIP
        >>> f.result() is None               # doctest: +SKIP
        """),
    "take": ("""Returns a :py:class:`concurrent.futures.Future` for retrieving
        the first num elements of the DataFrame.

        >>> rdd = spark.range(10)            # doctest: +SKIP
        >>> f = df.takeAsync(3)              # doctest: +SKIP
        >>> f.result() is None               # doctest: +SKIP
        [Row(id=0), Row(id=1), Row(id=2)]
        """),
}

patch_all(DataFrame, actions)
Esempio n. 3
0
from pyspark.ml.base import Estimator
from asyncactions.utils import patch_all

actions = {
    "fit":
    """Asynchronously fits a model to the input dataset with optional parameters.

        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`
        :param params: an optional param map that overrides embedded params. If a list/tuple of
                       param maps is given, this calls fit on each param map and returns a list of
                       models.
        :returns: :py:class:`concurrent.futures.Future` of fitted model(s)
        """
}

patch_all(Estimator, actions)