Python fail_on_stopiteration 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pyspark.util

메소드/함수: fail_on_stopiteration

hotexamples.com에서의 예제들: 11

Python fail_on_stopiteration - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pyspark.util.fail_on_stopiteration에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index):
    num_arg = read_int(infile)
    arg_offsets = [read_int(infile) for i in range(num_arg)]
    row_func = None
    for i in range(read_int(infile)):
        f, return_type = read_command(pickleSer, infile)
        if row_func is None:
            row_func = f
        else:
            row_func = chain(row_func, f)

    # make sure StopIteration's raised in the user code are not ignored
    # when they are processed in a for loop, raise them as RuntimeError's instead
    func = fail_on_stopiteration(row_func)

    # the last returnType will be the return type of UDF
    if eval_type == PythonEvalType.SQL_SCALAR_PANDAS_UDF:
        return arg_offsets, wrap_scalar_pandas_udf(func, return_type)
    elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
        argspec = _get_argspec(row_func)  # signature was lost when wrapping it
        return arg_offsets, wrap_grouped_map_pandas_udf(func, return_type, argspec, runner_conf)
    elif eval_type == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
        return arg_offsets, wrap_grouped_agg_pandas_udf(func, return_type)
    elif eval_type == PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF:
        return arg_offsets, wrap_window_agg_pandas_udf(func, return_type, runner_conf, udf_index)
    elif eval_type == PythonEvalType.SQL_BATCHED_UDF:
        return arg_offsets, wrap_udf(func, return_type)
    else:
        raise ValueError("Unknown eval type: {}".format(eval_type))

예제 #2

파일 보기

    def _create_judf(self):
        from pyspark.sql import SparkSession

        spark = SparkSession.builder.getOrCreate()
        sc = spark.sparkContext

        func = fail_on_stopiteration(self.func)
        wrapped_func = _wrap_function(sc, func, self.returnType)
        jdt = spark._jsparkSession.parseDataType(self.returnType.json())
        judf = sc._jvm.org.apache.spark.sql.execution.python.UserDefinedPythonFunction(
            self._name, wrapped_func, jdt, self.evalType, self.deterministic)
        return judf

예제 #3

파일 보기

파일: udf.py 프로젝트: FUHENG0571/S

    def _create_judf(self):
        from pyspark.sql import SparkSession

        spark = SparkSession.builder.getOrCreate()
        sc = spark.sparkContext

        func = fail_on_stopiteration(self.func)

        # for pandas UDFs the worker needs to know if the function takes
        # one or two arguments, but the signature is lost when wrapping with
        # fail_on_stopiteration, so we store it here
        if self.evalType in (PythonEvalType.SQL_SCALAR_PANDAS_UDF,
                             PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
                             PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF):
            func._argspec = _get_argspec(self.func)

        wrapped_func = _wrap_function(sc, func, self.returnType)
        jdt = spark._jsparkSession.parseDataType(self.returnType.json())
        judf = sc._jvm.org.apache.spark.sql.execution.python.UserDefinedPythonFunction(
            self._name, wrapped_func, jdt, self.evalType, self.deterministic)
        return judf

예제 #4

파일 보기

파일: udf.py 프로젝트: ManosGEM/spark

    def _create_judf(self):
        from pyspark.sql import SparkSession

        spark = SparkSession.builder.getOrCreate()
        sc = spark.sparkContext

        func = fail_on_stopiteration(self.func)

        # for pandas UDFs the worker needs to know if the function takes
        # one or two arguments, but the signature is lost when wrapping with
        # fail_on_stopiteration, so we store it here
        if self.evalType in (PythonEvalType.SQL_SCALAR_PANDAS_UDF,
                             PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
                             PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF):
            func._argspec = _get_argspec(self.func)

        wrapped_func = _wrap_function(sc, func, self.returnType)
        jdt = spark._jsparkSession.parseDataType(self.returnType.json())
        judf = sc._jvm.org.apache.spark.sql.execution.python.UserDefinedPythonFunction(
            self._name, wrapped_func, jdt, self.evalType, self.deterministic)
        return judf

예제 #5

파일 보기

파일: worker.py 프로젝트: Deegue/spark-heguozi

def read_single_udf(pickleSer, infile, eval_type):
    num_arg = read_int(infile)
    arg_offsets = [read_int(infile) for i in range(num_arg)]
    row_func = None
    for i in range(read_int(infile)):
        f, return_type = read_command(pickleSer, infile)
        if row_func is None:
            row_func = f
        else:
            row_func = chain(row_func, f)

    # make sure StopIteration's raised in the user code are not ignored
    # when they are processed in a for loop, raise them as RuntimeError's instead
    func = fail_on_stopiteration(row_func)

    # the last returnType will be the return type of UDF
    if eval_type == PythonEvalType.SQL_SCALAR_PANDAS_UDF:
        return arg_offsets, wrap_scalar_pandas_udf(func, return_type)
    elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
        return arg_offsets, wrap_grouped_map_pandas_udf(func, return_type)
    else:
        return arg_offsets, wrap_udf(func, return_type)

예제 #6

파일 보기

파일: shuffle.py 프로젝트: swisscom-bigdata/spark

 def __init__(self, createCombiner, mergeValue, mergeCombiners):
     self.createCombiner = fail_on_stopiteration(createCombiner)
     self.mergeValue = fail_on_stopiteration(mergeValue)
     self.mergeCombiners = fail_on_stopiteration(mergeCombiners)

예제 #7

파일 보기

파일: _table.py 프로젝트: yubo1993/FATE

def _fail_on_stopiteration(fn):
    # noinspection PyPackageRequirements
    from pyspark import util

    return util.fail_on_stopiteration(fn)

예제 #8

파일 보기

파일: shuffle.py 프로젝트: BaiBenny/spark

 def __init__(self, createCombiner, mergeValue, mergeCombiners):
     self.createCombiner = fail_on_stopiteration(createCombiner)
     self.mergeValue = fail_on_stopiteration(mergeValue)
     self.mergeCombiners = fail_on_stopiteration(mergeCombiners)

예제 #9

파일 보기

 def _func(_, iterator):
     return filter(fail_on_stopiteration(_fn), iterator)

예제 #10

파일 보기

 def _func(_, iterator):
     return chain.from_iterable(map(fail_on_stopiteration(_fn), iterator))

예제 #11

파일 보기

파일: rdd_func.py 프로젝트: UnreliableBuilder/Fate

 def _func(_, iterator):
     return map(util.fail_on_stopiteration(_fn), iterator)