def _prep_func_for_compare(func, mode): sort_locally = should_sort_locally() if should_sort_on_spark(): def with_sorted(spark): df = func(spark) return df.sort(df.columns) sorted_func = with_sorted else: sorted_func = func limit_val = get_limit() if limit_val > 0: def with_limit(spark): df = sorted_func(spark) return df.limit(limit_val) limit_func = with_limit else: limit_func = sorted_func if mode == 'COLLECT': bring_back = lambda spark: limit_func(spark).collect() collect_type = 'COLLECT' elif mode == 'COUNT': bring_back = lambda spark: limit_func(spark).count() collect_type = 'COUNT' else: bring_back = lambda spark: limit_func(spark).toLocalIterator() collect_type = 'ITERATOR' if sort_locally: raise RuntimeError('Local Sort is only supported on a collect') return (bring_back, collect_type)
def _assert_gpu_and_cpu_writes_are_equal(write_func, read_func, base_path, mode, conf={}): conf = _prep_incompat_conf(conf) print('### CPU RUN ###') cpu_start = time.time() cpu_path = base_path + '/CPU' with_cpu_session(lambda spark: write_func(spark, cpu_path), conf=conf) cpu_end = time.time() print('### GPU RUN ###') gpu_start = time.time() gpu_path = base_path + '/GPU' with_gpu_session(lambda spark: write_func(spark, gpu_path), conf=conf) gpu_end = time.time() print('### WRITE: GPU TOOK {} CPU TOOK {} ###'.format( gpu_end - gpu_start, cpu_end - cpu_start)) (cpu_bring_back, cpu_collect_type) = _prep_func_for_compare( lambda spark: read_func(spark, cpu_path), mode) (gpu_bring_back, gpu_collect_type) = _prep_func_for_compare( lambda spark: read_func(spark, gpu_path), mode) from_cpu = with_cpu_session(cpu_bring_back, conf=conf) from_gpu = with_cpu_session(gpu_bring_back, conf=conf) if should_sort_locally(): from_cpu.sort(key=_RowCmp) from_gpu.sort(key=_RowCmp) assert_equal(from_cpu, from_gpu)
def assert_gpu_fallback_collect(func, cpu_fallback_class_name, conf={}): (bring_back, collect_type) = _prep_func_for_compare(func, 'COLLECT') conf = _prep_incompat_conf(conf) print('### CPU RUN ###') cpu_start = time.time() from_cpu = with_cpu_session(bring_back, conf=conf) cpu_end = time.time() print('### GPU RUN ###') jvm = spark_jvm() jvm.com.nvidia.spark.rapids.ExecutionPlanCaptureCallback.startCapture() gpu_start = time.time() from_gpu = with_gpu_session(bring_back, conf=conf) gpu_end = time.time() jvm.com.nvidia.spark.rapids.ExecutionPlanCaptureCallback.assertCapturedAndGpuFellBack(cpu_fallback_class_name, 2000) print('### {}: GPU TOOK {} CPU TOOK {} ###'.format(collect_type, gpu_end - gpu_start, cpu_end - cpu_start)) if should_sort_locally(): from_cpu.sort(key=_RowCmp) from_gpu.sort(key=_RowCmp) assert_equal(from_cpu, from_gpu)
def assert_gpu_fallback_write(write_func, read_func, base_path, cpu_fallback_class_name, conf={}): conf = _prep_incompat_conf(conf) print('### CPU RUN ###') cpu_start = time.time() cpu_path = base_path + '/CPU' with_cpu_session(lambda spark : write_func(spark, cpu_path), conf=conf) cpu_end = time.time() print('### GPU RUN ###') jvm = spark_jvm() jvm.com.nvidia.spark.rapids.ExecutionPlanCaptureCallback.startCapture() gpu_start = time.time() gpu_path = base_path + '/GPU' with_gpu_session(lambda spark : write_func(spark, gpu_path), conf=conf) gpu_end = time.time() jvm.com.nvidia.spark.rapids.ExecutionPlanCaptureCallback.assertCapturedAndGpuFellBack(cpu_fallback_class_name, 2000) print('### WRITE: GPU TOOK {} CPU TOOK {} ###'.format( gpu_end - gpu_start, cpu_end - cpu_start)) (cpu_bring_back, cpu_collect_type) = _prep_func_for_compare( lambda spark: read_func(spark, cpu_path), 'COLLECT') (gpu_bring_back, gpu_collect_type) = _prep_func_for_compare( lambda spark: read_func(spark, gpu_path), 'COLLECT') from_cpu = with_cpu_session(cpu_bring_back, conf=conf) from_gpu = with_cpu_session(gpu_bring_back, conf=conf) if should_sort_locally(): from_cpu.sort(key=_RowCmp) from_gpu.sort(key=_RowCmp) assert_equal(from_cpu, from_gpu)
def run_with_cpu_and_gpu(func, mode, conf={}): (bring_back, collect_type) = _prep_func_for_compare(func, mode) conf = _prep_incompat_conf(conf) def run_on_cpu(): print('### CPU RUN ###') global cpu_start cpu_start = time.time() global from_cpu from_cpu = with_cpu_session(bring_back, conf=conf) global cpu_end cpu_end = time.time() def run_on_gpu(): print('### GPU RUN ###') global gpu_start gpu_start = time.time() global from_gpu from_gpu = with_gpu_session(bring_back, conf=conf) global gpu_end gpu_end = time.time() run_on_cpu() run_on_gpu() print('### {}: GPU TOOK {} CPU TOOK {} ###'.format(collect_type, gpu_end - gpu_start, cpu_end - cpu_start)) if should_sort_locally(): from_cpu.sort(key=_RowCmp) from_gpu.sort(key=_RowCmp) return (from_cpu, from_gpu)
def assert_cpu_and_gpu_are_equal_collect_with_capture(func, exist_classes='', non_exist_classes='', conf={}): (bring_back, collect_type) = _prep_func_for_compare(func, 'COLLECT_WITH_DATAFRAME') conf = _prep_incompat_conf(conf) print('### CPU RUN ###') cpu_start = time.time() from_cpu, cpu_df = with_cpu_session(bring_back, conf=conf) cpu_end = time.time() print('### GPU RUN ###') gpu_start = time.time() from_gpu, gpu_df = with_gpu_session(bring_back, conf=conf) gpu_end = time.time() jvm = spark_jvm() if exist_classes: for clz in exist_classes.split(','): jvm.com.nvidia.spark.rapids.ExecutionPlanCaptureCallback.assertContains(gpu_df._jdf, clz) if non_exist_classes: for clz in non_exist_classes.split(','): jvm.com.nvidia.spark.rapids.ExecutionPlanCaptureCallback.assertNotContain(gpu_df._jdf, clz) print('### {}: GPU TOOK {} CPU TOOK {} ###'.format(collect_type, gpu_end - gpu_start, cpu_end - cpu_start)) if should_sort_locally(): from_cpu.sort(key=_RowCmp) from_gpu.sort(key=_RowCmp) assert_equal(from_cpu, from_gpu)
def _assert_gpu_and_cpu_are_equal(func, should_collect, conf={}): (bring_back, collect_type) = _prep_func_for_compare(func, should_collect) conf = _prep_incompat_conf(conf) print('### CPU RUN ###') cpu_start = time.time() from_cpu = with_cpu_session(bring_back, conf=conf) cpu_end = time.time() print('### GPU RUN ###') gpu_start = time.time() from_gpu = with_gpu_session(bring_back, conf=conf) gpu_end = time.time() print('### {}: GPU TOOK {} CPU TOOK {} ###'.format(collect_type, gpu_end - gpu_start, cpu_end - cpu_start)) if should_sort_locally(): from_cpu.sort(key=_RowCmp) from_gpu.sort(key=_RowCmp) assert_equal(from_cpu, from_gpu)