assert SCALE_FACTOR > 0, "SCALE_FACTOR must be > 0." # If set, removes the first N trials for each test from all reported statistics. Useful for # tests which have outlier behavior due to JIT and other system cache warm-ups. If any test # returns fewer N + 1 results, an exception is thrown. IGNORED_TRIALS = 2 # Command used to launch Scala or Java. # Set up OptionSets. Note that giant cross product is done over all JavaOptionsSets + OptionSets # passed to each test which may be combinations of those set up here. # Java options. COMMON_JAVA_OPTS = [ # Fraction of JVM memory used for caching RDDs. JavaOptionSet("spark.storage.memoryFraction", [0.66]), JavaOptionSet("spark.serializer", ["org.apache.spark.serializer.JavaSerializer"]), JavaOptionSet("spark.executor.memory", ["16g"]), # Turn event logging on in order better diagnose failed tests. Off by default as it crashes # releases prior to 1.0.2 JavaOptionSet("spark.eventLog.enabled", [True]), JavaOptionSet("spark.eventLog.dir", ["file:///root/spark-logs"]), # To ensure consistency across runs, we disable delay scheduling JavaOptionSet("spark.locality.wait", [str(60 * 1000 * 1000)]) ] # Set driver memory here SPARK_DRIVER_MEMORY = "16g" # The following options value sets are shared among all tests. COMMON_OPTS = [ # How many times to run each experiment - used to warm up system caches.
# The following function generates options for setting batch duration in streaming tests def streaming_batch_duration_opts(duration): return [OptionSet("batch-duration", [duration])] # The following function generates options for setting window duration in streaming tests def streaming_window_duration_opts(duration): return [OptionSet("window-duration", [duration])] STREAMING_COMMON_OPTS = [OptionSet("total-duration", [60])] # OptionSet("hdfs-url", [HDFS_URL]), STREAMING_COMMON_JAVA_OPTS = [ # Fraction of JVM memory used for caching RDDs. JavaOptionSet("spark.storage.memoryFraction", [0.66]), JavaOptionSet("spark.serializer", ["org.apache.spark.serializer.JavaSerializer"]), # JavaOptionSet("spark.executor.memory", ["9g"]), JavaOptionSet("spark.executor.extraJavaOptions", [" -XX:+UseConcMarkSweepGC "]) ] STREAMING_KEY_VAL_TEST_OPTS = STREAMING_COMMON_OPTS + streaming_batch_duration_opts( 2000) + [ # Number of input streams. OptionSet("num-streams", [1], can_scale=True), # Number of records per second per input stream OptionSet("records-per-sec", [10 * 1000]), # Number of reduce tasks. OptionSet("reduce-tasks", [10], can_scale=True),
assert SCALE_FACTOR > 0, "SCALE_FACTOR must be > 0." # If set, removes the first N trials for each test from all reported statistics. Useful for # tests which have outlier behavior due to JIT and other system cache warm-ups. If any test # returns fewer N + 1 results, an exception is thrown. IGNORED_TRIALS = 1 # Command used to launch Scala or Java. # Set up OptionSets. Note that giant cross product is done over all JavaOptionsSets + OptionSets # passed to each test which may be combinations of those set up here. # Java options. COMMON_JAVA_OPTS = [ # Fraction of JVM memory used for caching RDDs. JavaOptionSet("spark.storage.memoryFraction", [0.66]), JavaOptionSet("spark.serializer", ["org.apache.spark.serializer.KryoSerializer"]), JavaOptionSet("spark.executor.memory", ["200g"]), # Turn event logging on in order better diagnose failed tests. Off by default as it crashes # releases prior to 1.0.2 # JavaOptionSet("spark.eventLog.enabled", [True]), # To ensure consistency across runs, we disable delay scheduling JavaOptionSet("spark.locality.wait", [str(60 * 1000 * 1000)]) ] # Set driver memory here SPARK_DRIVER_MEMORY = "20g" # The following options value sets are shared among all tests. COMMON_OPTS = [ # How many times to run each experiment - used to warm up system caches. # This OptionSet should probably only have a single value (i.e., length 1) # since it doesn't make sense to have multiple values here.