Ejemplo n.º 1
0
 def do_join(spark):
     left, right = create_df(spark, data_gen, 500, 500)
     cached = left.join(right.hint("broadcast"), left.a == right.r_a, join_type).cache()
     cached.count()
     return cached
Ejemplo n.º 2
0
 def do_join(spark):
     left, right = create_df(spark, data_gen, 500, 500)
     cached = left.join(right, left.a == right.r_a, join_type).cache()
     cached.count() # populates cache
     return cached
Ejemplo n.º 3
0
 def do_join(spark):
     left, right = create_df(spark, data, 500, 500)
     cached = left.join(right, left.a == right.r_a, join_type).cache()
     cached.count() #populates the cache
     return cached.filter("a is not null")
Ejemplo n.º 4
0
 def do_join(spark):
     left, right = create_df(spark, data_gen, 50, 25)
     cached = left.crossJoin(right.hint("broadcast")).cache()
     cached.count()
     return cached