Exemple #1
0
def test_unary_positive(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).selectExpr('+a'))
Exemple #2
0
def test_decimal_round(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).selectExpr(
            'round(a)', 'round(a, -1)', 'round(a, 1)', 'round(a, 10)'),
        conf=allow_negative_scale_of_decimal_conf)
Exemple #3
0
def test_degrees_small(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).selectExpr('degrees(a)'))
Exemple #4
0
def test_coalesce_df(num_parts, length):
    #This should change eventually to be more than just the basic gens
    gen_list = [('_c' + str(i), gen)
                for i, gen in enumerate(all_basic_gens + decimal_gens)]
    assert_gpu_and_cpu_are_equal_collect(lambda spark: gen_df(
        spark, gen_list, length=length).coalesce(num_parts))
Exemple #5
0
def test_initcap_special_chars():
    gen = mk_str_gen('ʼn([aAbB13ȺéŸ]{0,5}){1,5}')
    assert_gpu_and_cpu_are_equal_collect(
            lambda spark: unary_op_df(spark, gen).select(
                f.initcap(f.col('a'))))
Exemple #6
0
def test_union_by_missing_col_name(data_gen):
    assert_gpu_and_cpu_are_equal_collect(lambda spark: binary_op_df(
        spark, data_gen).withColumnRenamed("a", "x").unionByName(
            binary_op_df(spark, data_gen).withColumnRenamed("a", "y"), True))
Exemple #7
0
def test_union_by_name(data_gen):
    assert_gpu_and_cpu_are_equal_collect(lambda spark: binary_op_df(
        spark, data_gen).unionByName(binary_op_df(spark, data_gen)))
Exemple #8
0
def test_single_orderby_with_limit(data_gen, order):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).orderBy(order).limit(100))
Exemple #9
0
def test_single_nested_orderby_with_limit(data_gen, order):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).orderBy(order).limit(100),
        conf={'spark.rapids.allowCpuRangePartitioning': False})
Exemple #10
0
def test_array_cast_fallback():
    def cast_float_to_double(spark):
        df = two_col_df(spark, int_gen, ArrayGen(int_gen))
        res = df.select(df.b.cast(ArrayType(StringType())))
        return res
    assert_gpu_and_cpu_are_equal_collect(cast_float_to_double)
Exemple #11
0
def test_array_cast_bad_from_good_to_fallback(child_gen, child_to_type):
    def cast_array(spark):
        df = two_col_df(spark, int_gen, ArrayGen(child_gen))
        res = df.select(df.b.cast(ArrayType(child_to_type)))
        return res
    assert_gpu_and_cpu_are_equal_collect(cast_array)
Exemple #12
0
def test_array_element_at_all_null_ansi_not_fail(data_gen):
    assert_gpu_and_cpu_are_equal_collect(lambda spark: unary_op_df(
        spark, data_gen).select(element_at(col('a'), 100)),
                               conf={'spark.sql.ansi.enabled':True,
                               'spark.sql.legacy.allowNegativeScaleOfDecimal': True})
Exemple #13
0
def test_columnar_pow(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: binary_op_df(spark, data_gen).selectExpr('pow(a, b)'))
Exemple #14
0
def test_columnar_asinh_improved(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).selectExpr('asinh(a)'),
        {'spark.rapids.sql.improvedFloatOps.enabled': 'true'})
Exemple #15
0
def test_passing_gpuExpr_as_Expr(enableVectorizedConf):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, string_gen).select(f.col("a")).na.
        drop().groupBy(f.col("a")).agg(f.count(f.col("a")).alias("count_a")).
        orderBy(f.col("count_a").desc(), f.col("a")).cache().limit(50),
        enableVectorizedConf)
Exemple #16
0
def test_single_sort_in_part(data_gen, order):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).sortWithinPartitions(order),
        conf=allow_negative_scale_of_decimal_conf)
Exemple #17
0
def test_union_struct_missing_children(data_gen):
    left_gen, right_gen = data_gen
    assert_gpu_and_cpu_are_equal_collect(lambda spark: binary_op_df(
        spark, left_gen).unionByName(binary_op_df(spark, right_gen), True))
Exemple #18
0
def test_multi_orderby(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: binary_op_df(spark, data_gen).orderBy(
            f.col('a'),
            f.col('b').desc()),
        conf=allow_negative_scale_of_decimal_conf)
Exemple #19
0
 def assert_union_equal(gen1, gen2):
     assert_gpu_and_cpu_are_equal_collect(lambda spark: unary_op_df(
         spark, gen1).unionByName(unary_op_df(spark, gen2), True))
Exemple #20
0
def test_multi_orderby_with_limit(data_gen):
    assert_gpu_and_cpu_are_equal_collect(lambda spark: binary_op_df(
        spark, data_gen).orderBy(f.col('a'),
                                 f.col('b').desc()).limit(100))
Exemple #21
0
def test_coalesce_types(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: gen_df(spark, data_gen).coalesce(2))
Exemple #22
0
def test_orderby_with_processing_and_limit(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        # avoid ambiguity in the order by statement for floating point by including a as a backup ordering column
        lambda spark: unary_op_df(spark, data_gen).orderBy(
            f.lit(100) - f.col('a'), f.col('a')).limit(100))
Exemple #23
0
def test_repeat_column_and_column():
    gen_s = StringGen(nullable=True)
    gen_r = IntegerGen(min_val=-100, max_val=100, special_cases=[0], nullable=True)
    assert_gpu_and_cpu_are_equal_collect(
            lambda spark: two_col_df(spark, gen_s, gen_r).selectExpr('repeat(a, b)'))
Exemple #24
0
def test_large_orderby():
    assert_gpu_and_cpu_are_equal_collect(
            lambda spark : unary_op_df(spark, long_gen, length=1024*128)\
                    .orderBy(f.col('a')),
            conf = {'spark.rapids.sql.batchSizeBytes': '16384'})
Exemple #25
0
def test_ceil(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).selectExpr('ceil(a)'),
        conf=allow_negative_scale_of_decimal_conf)
Exemple #26
0
def test_single_orderby(data_gen, order):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).orderBy(order),
        conf=allow_negative_scale_of_decimal_conf)
Exemple #27
0
def test_bit_not(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).selectExpr('~a'))
Exemple #28
0
def test_cache_partial_load(data_gen, enableVectorizedConf):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: two_col_df(spark, data_gen, string_gen).select(
            f.col("a"), f.col("b")).cache().limit(50).select(f.col("b")),
        enableVectorizedConf)
Exemple #29
0
def test_cos(data_gen):
    assert_gpu_and_cpu_are_equal_collect(
        lambda spark: unary_op_df(spark, data_gen).selectExpr('cos(a)'))
Exemple #30
0
def test_struct_get_item(data_gen):
    assert_gpu_and_cpu_are_equal_collect(lambda spark: unary_op_df(
        spark, data_gen).selectExpr('a.first', 'a.second', 'a.third'))