def streaming_window_duration_opts(duration): return [OptionSet("window-duration", [duration])]
# If set, removes the first N trials for each test from all reported statistics. Useful for # tests which have outlier behavior due to JIT and other system cache warm-ups. If any test # returns fewer N + 1 results, an exception is thrown. IGNORED_TRIALS = 2 # Command used to launch Scala or Java. # Set up OptionSets. Note that giant cross product is done over all JavaOptionsSets + OptionSets # passed to each test which may be combinations of those set up here. # The following options value sets are shared among all tests. COMMON_JAVA_OPTS = [] COMMON_OPTS = [ # How many times to run each experiment - used to warm up system caches. OptionSet("num-trials", [10]) ] # The following options value sets are shared among all tests of # operations on key-value data. SPARK_KEY_VAL_TEST_OPTS = [ # The number of input partitions. OptionSet("num-partitions", [400], can_scale=True), # The number of reduce tasks. OptionSet("reduce-tasks", [400], can_scale=True), # A random seed to make tests reproducable. OptionSet("random-seed", [5]), # Input persistence strategy (can be "memory", "disk", or "hdfs"). # NOTE: If "hdfs" is selected, datasets will be re-used across runs of # this script. This means parameters here are effectively ignored if # an existing input dataset is present.
def streaming_batch_duration_opts(duration): return [OptionSet("batch-duration", [duration])]
JavaOptionSet("spark.serializer", ["org.apache.spark.serializer.KryoSerializer"]), JavaOptionSet("spark.executor.memory", ["200g"]), # Turn event logging on in order better diagnose failed tests. Off by default as it crashes # releases prior to 1.0.2 # JavaOptionSet("spark.eventLog.enabled", [True]), # To ensure consistency across runs, we disable delay scheduling JavaOptionSet("spark.locality.wait", [str(60 * 1000 * 1000)]) ] # Set driver memory here SPARK_DRIVER_MEMORY = "20g" # The following options value sets are shared among all tests. COMMON_OPTS = [ # How many times to run each experiment - used to warm up system caches. # This OptionSet should probably only have a single value (i.e., length 1) # since it doesn't make sense to have multiple values here. OptionSet("num-trials", [2]), # Extra pause added between trials, in seconds. For runs with large amounts # of shuffle data, this gives time for buffer cache write-back. OptionSet("inter-trial-wait", [3]) ] # The following options value sets are shared among all tests of # operations on key-value data. SPARK_KEY_VAL_TEST_OPTS = [ # The number of input partitions. OptionSet("num-partitions", [400], can_scale=True), # The number of reduce tasks. OptionSet("reduce-tasks", [400], can_scale=True), # A random seed to make tests reproducable. OptionSet("random-seed", [5]), # Input persistence strategy (can be "memory", "disk", or "hdfs").